Ejemplo n.º 1
0
    def remove_op_created_hook(self, handle: int):
        """Remove an Op created hook. `handle` should be the result of calling `Graph.register_op_created_hook`.

        Args:
            handle (int): handle to an Op created hook.
        """
        get_current_context().remove_op_created_hook(handle)
Ejemplo n.º 2
0
def softmax(t: Tensor, axis: int) -> Tensor:
    """
    Computes the softmax operation on a Tensor. This recales the slices of axis
    such that all elements are within [0, 1] and sum to 1.
    The output shape and dtype matches the input.

    Args:
        t: Tensor
            Tensor to be softmaxed.
        axis: int
            The axis along which the softmax will be computed.
    Returns:
        out: Tensor
            The softmaxed tensor
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    settings = ctx._get_op_settings('softmax')
    opid = _ir.OperatorIdentifier("ai.onnx", "Softmax", 11, _ir.NumInputs(
        1, 1), 1)
    op = pb_g.createConnectedOp_SoftmaxOp(
        {0: t.id}, {0: g._create_tensor_id(f"softmax_out")}, opid,
        handle_negative_axis(t, axis), settings)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 3
0
def gelu(t: Tensor) -> Tensor:
    """
    Computes the Gelu activation on a Tensor.
    https://arxiv.org/abs/1606.08415

    Args:
        t: Tensor
            Input tensor.
    Returns:
        out: Tensor
            Output tensor.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    settings = ctx._get_op_settings('gelu')
    opid = _ir.OperatorIdentifier("ai.graphcore", "Gelu", 1,
                                  _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_GeluOp({0: t.id},
                                       {0: g._create_tensor_id(f"gelu_out")},
                                       opid, settings)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 4
0
def transpose(t: Tensor,
              permutation: Optional[Tuple[int, ...]] = None) -> Tensor:
    """
    Permute the axes of a Tensor. By default reverses the axes of t.

    Args:
        t: Tensor
            Tensor to be transposed.
        permutation: tuple of ints (optional)
            Tuple containing the a permutation of [0, N-1] where N is the
            rank of input `t`. If not provided, the axes will be reversed.
    Returns:
        out: Tensor
            The transposed tensor
    """
    permutation = _handle_permuation(t, permutation)

    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    settings = ctx._get_op_settings('transpose')
    opid = _ir.OperatorIdentifier("ai.onnx", "Transpose", 1,
                                  _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_TransposeOp(
        {0: t.id},
        {0: g._create_tensor_id(f"{t.name}_T")},
        opid,
        permutation,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 5
0
def mul(lhs: Tensor, rhs: Tensor) -> Tensor:
    """
    Multiplies two Tensors element-wise.
    Follows numpy broadcasting rules.
    Arguments must have the same dtype.
    
    Args:
        lhs, rhs: Tensor
            Tensors to be multiplied.
    Returns:
        mul: Tensor
            The product of lhs and rhs
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, lhs, rhs)

    settings = ctx._get_op_settings('mul')
    opid = _ir.OperatorIdentifier("ai.onnx", "Mul", 7, _ir.NumInputs(2, 2), 1)

    op = pb_g.createConnectedOp_MulOp(
        {
            0: lhs.id,
            1: rhs.id
        },
        {
            0: g._create_tensor_id("mul_out"),
        },
        opid,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 6
0
def host_store(d2h_stream: DeviceToHostStream, t: Tensor) -> None:
    """
    Host Store: an op to represent the transfer of data from the device to the
    host. It uses the existing device to host transfers created when building
    the IR, but defers the actual poplar::Copy until the op itself runs. This
    allows the copy to be scheduled as part of the normal op scheduling.

    Args:
        t (Tensor): The input tensor to copy to host.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)
    check_in_graph(g.ir().main_graph(), d2h_stream._stream_tensor)

    if d2h_stream.dtype != t.dtype:
        raise ValueError(
            f'dtype of stream {d2h_stream.tensor_id()} `{d2h_stream.dtype}` does not match dtype of provided tensor `{t.dtype}`'
        )
    if d2h_stream.shape != t.shape:
        raise ValueError(
            f'shape of stream {d2h_stream.tensor_id()} `{d2h_stream.shape}` does not match shape of provided tensor `{t.shape}`'
        )

    opid = _ir.OperatorIdentifier("ai.graphcore", "HostStore", 1,
                                  _ir.NumInputs(1), 0)

    pb_g.createConnectedOp_HostStoreOp({0: t.id}, {}, opid,
                                       ctx._get_op_settings('host_store'),
                                       d2h_stream.tensor_id())
Ejemplo n.º 7
0
def cast(t: Tensor, data_type: dtype) -> Tensor:
    """
    Casts tensor `t` to data type `dtype`.

    Args:
        t: Tensor
            Tensors to be casted.
        data_type: popart.ir.dtypes.dtype
            Dtype to cast to
    Returns:
        add: Tensor
            The sum of lhs and rhs
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    settings = ctx._get_op_settings('cast')
    opid = _ir.OperatorIdentifier("ai.onnx", "Cast", 9, _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_CastOp(
        {0: t.id},
        {0: g._create_tensor_id(f"{t.id}_{data_type._name}")},
        _to=data_type._pb_dtype,
        opid=opid,
        settings=settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 8
0
def increment_mod_(t: Tensor, increment: float, modulus: float) -> Tensor:
    """
    Compute `(t + increment) % modulus` inplace on `t`

    Args:
        t: Tensor
            Tensor to increment (modulo)
        increment: float
            How much to increment the input tensor by.
        increment: float
            The modulo operand.
    Returns:
        out: Tensor (alias of input `t`)
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    settings = ctx._get_op_settings('increment_mod_inplace')
    op = pb_g.createConnectedOp_IncrementModInplaceOp(
        {
            0: t.id,
        },
        {
            0: g._create_tensor_id("increment_mod_inplace_out"),
        },
        increment,
        modulus,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 9
0
def copy_var_update_(t: Tensor, X: Tensor) -> Tensor:
    """
    Updates tensor `t` inplace by copying `X`.

    Args:
        t: Tensor
            Tensor to be updated.
        X: Tensor
            Value to update the variable
    Returns:
        updated: Tensor
            An alias to the variable.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t, X)

    settings = ctx._get_op_settings('copy_var_update')
    op = pb_g.createConnectedOp_CopyVarUpdateOp(
        {
            0: t.id,
            1: X.id
        },
        {0: g._create_tensor_id('updated__' + t.name)},
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 10
0
def replicated_all_gather(t: Tensor,
                          group: Optional[CommGroup] = None) -> Tensor:
    """Gathers tensor `t` across replicas. Output tensor contains in the values of `t` from each replica.

    Args:
        t (Tensor): Tensor to be reduced. Must be rank=1.
        group (Optional[CommGroup], optional): Replicas to gather from. Defaults to All replicas.

    Returns:
        Tensor: Gathered tensor.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if group is None:
        group = CommGroup()

    settings = ctx._get_op_settings('replicated_all_gathered')
    opid = _ir.OperatorIdentifier("ai.graphcore", "ReplicatedAllGather", 1,
                                  _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_ReplicatedAllGatherOp(
        {0: t.id}, {0: g._create_tensor_id(t.name + "_all_gathered")}, opid,
        group, settings)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 11
0
def replicated_reduce_scatter(t: Tensor,
                              op: CollectiveOperator = CollectiveOperator.Add,
                              group: Optional[CommGroup] = None) -> Tensor:
    """Reduces tensor `t` across replicas. Each replica will only receive a unique slice of `t`.

    Args:
        t (Tensor): Tensor to be reduced. Inputs will be flattened.
        op (CollectiveOperator, optional): Operation to reduce with. Defaults to CollectiveOperator.Add.
        group (Optional[CommGroup], optional): Replicas to reduce across. Defaults to All replicas.

    Returns:
        Tensor: A slice of the reduced tensor. Always a 1D tensor.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if group is None:
        group = CommGroup()

    settings = ctx._get_op_settings('replicated_reduce_scatter')
    opid = _ir.OperatorIdentifier("ai.graphcore", "ReplicatedReduceScatter", 1,
                                  _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_ReplicatedReduceScatterOp(
        {0: t.id}, {0: g._create_tensor_id(t.name + "_reduce_scattered")},
        opid, op, group, settings)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 12
0
def sub(lhs: Tensor, rhs: Tensor) -> Tensor:
    """Subtracts two Tensors element-wise. Follows numpy broadcasting rules. Arguments must have the same dtype.
        Args:
            lhs, rhs: Tensor
                Tensors to be subtracted.
        Returns:
            add: Tensor
                The value of (lhs - rhs)"""
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, lhs, rhs)

    settings = ctx._get_op_settings('sub')
    opid = _ir.OperatorIdentifier("ai.onnx", "Sub", 7, _ir.NumInputs(2, 2), 1)
    op = pb_g.createConnectedOp_SubtractOp(
        {
            0: lhs.id,
            1: rhs.id
        },
        {
            0: g._create_tensor_id("sub_out"),
        },
        opid,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 13
0
def merge_exchange():
    """Combine RemoteLoad/RemoteStore/HostLoad/HostStore operations into a single MergeExchange operation.
        This guarentees that any external synchronisation for these operations are merged allowing for the operations
        to execute in parallel.

        Only applies to operations the current graph. Used as a contextmanager:
        ```
        with pir.merge_exchange():
            ops.host_load(..)
            ops.host_store(..)
        ```

        Note: Operations must be able to be scheduled in any order to be merged. For this reason it is recommended to combine with
            `with pir.in_sequence(False)` to avoid topological constraints that would prevent merging.
    """
    ctx = get_current_context()
    graph = ctx.graph
    ops: Set[int] = set()

    def hook(op: _ir.Op):
        ops.add(op.id)

    handle = graph.register_op_created_hook(hook)
    yield
    graph.remove_op_created_hook(handle)

    ops_created = _ir.transforms.MergeExchange().applyToOps(
        graph._pb_graph, ops)
    for op in ops_created:
        ctx._op_created(op)
Ejemplo n.º 14
0
def detach_(x: Tensor) -> Tensor:
    """
    This is the inplace version of :func:`~ops.detach`. Behaviour is the same, but blocks gradient
        propagation inplace on the input tensor.

    Args:
        x: Tensor
            Input tensor.
    Returns:
        out: Tensor
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, x)

    settings = ctx._get_op_settings('detach_inplace')
    op = pb_g.createConnectedOp_DetachInplaceOp(
        {0: x.id},
        {0: g._create_tensor_id("detach_out")},
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 15
0
def equal(lhs: Tensor, rhs: Tensor) -> Tensor:
    """
    Compares two Tensors element-wise with an equal operator.
    Follows numpy broadcasting rules.

    Args:
        lhs, rhs: Tensor
            Tensors to be compared.
    Returns:
        out: Tensor
            The value (lhs == rhs)
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, lhs, rhs)

    settings = ctx._get_op_settings('equal')
    opid = _ir.OperatorIdentifier("ai.onnx", "Equal", 7, _ir.NumInputs(2, 2),
                                  1)
    op = pb_g.createConnectedOp_AndOp(
        {
            0: lhs.id,
            1: rhs.id
        },
        {
            0: g._create_tensor_id("equal_out"),
        },
        opid,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 16
0
def logical_not(t: Tensor) -> Tensor:
    """
    Computes element-wise the value of NOT t.
    Inputs will be cast to bool if needed.

    Args:
        t: Tensor
            Input tensor.
    Returns:
        out: Tensor
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    t = cast_if_needed(t, dtypes.bool)

    settings = ctx._get_op_settings('not')
    opid = _ir.OperatorIdentifier("ai.onnx", "Not", 1, _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_NotOp(
        {0: t.id},
        {0: g._create_tensor_id("not_out")},
        opid,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 17
0
def slice(t: Tensor,
          start: Optional[Union[int, List[Optional[int]]]] = None,
          stop: Optional[Union[int, List[Optional[int]]]] = None,
          step: Optional[Union[int, List[Optional[int]]]] = None,
          axis: Optional[Union[int, List[int]]] = None) -> Tensor:
    """
    Selects elements from a tensor using a slice or multiple slices.

    A slice specifies the start (inclusive) and stop (exclusive) index of elements to select.
    Multiple slices can be specified using a list of items for each parameter (start, stop, step).
    If step is `-1` the slice is performed backwards.

    If axis is not specified, each slice will correspond to axis 0 to N where N is the number of slices.

    Examples:
    ```
    t == slice(t) == slice(t, axis=1)
    slice(t, start=1)           # Slice axis 0 from start index 1
    slice(t, start=[1,2]) == slice(t, start=[1,2], axis=[0,1])
    slice(t, stop=-2)           # Slice axis 0 upto second last element (exclusive)
    slice(t, stop=3, step=-1)   # Slice backwards from last element (inclusive) to third last element (exclusive)
    ```

    Args:
        t (Tensor): Tensor to slice
        start: Index of first element (inclusive) or `None` which defaults to 0.
        stop: Index of last element (exclusive) or `None` which defaults to last
            element (inclusive) if step is forward or first element (inclusive) if step is backwards.
        step: `1` for forward or `-1` for backwards.
        axis: Axis of tensor to slice on or `None` will default to each axis sequentially.

    Returns:
        Tensor: output tensor
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if start is None and stop is None and step is None:
        return t

    start, stop, step, axis = process_args(start, stop, step, axis)

    opid = _ir.OperatorIdentifier("ai.onnx", "Slice", 11, _ir.NumInputs(1, 1),
                                  1)
    settings = ctx._get_op_settings("slice")
    op = pb_g.createConnectedOp_SliceOp(
        {0: t.id},
        {0: g._create_tensor_id("slice_out")},
        starts_=start,
        ends_=stop,
        axes_=axis,
        steps_=step,
        opid=opid,
        settings=settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 18
0
def init(shape: Iterable[int], dtype: dtypes.dtype,
         name: Optional[str] = None) -> Tensor:
    """
    Init Op: create a tensor with zero values.
        The returned tensor is not considered a variable.

    Args:
        dtype (dtypes.dtype): Data type for the output Tensor
        shape (Tuple[int]): Shape of the output tensor.
        name (str): Name to use for the poplar stream.

    Returns:
        Tensor: The output tensor streamed from host.
    """
    ctx = get_current_context()
    g = ctx.graph

    pb_g = g._pb_graph
    info = _ir.TensorInfo(dtype._pb_dtype, list(shape))

    opid_init = _ir.OperatorIdentifier("ai.graphcore", "Init", 1,
                                       _ir.NumInputs(0), 1)
    op = pb_g.createConnectedOp_InitOp(
        {},
        {0: g._create_tensor_id(name)},
        opid_init,
        info,
        _ir.TensorType.ActGrad,
        _ir.InitType.Zero,
        ctx._get_op_settings('init'),
        -1,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 19
0
def detach(t: Tensor) -> Tensor:
    """
    Prevents gradient computation of this tensor.
    Numerically equivlent to the identity op.

    Args:
        t: Tensor
            Input tensor.
    Returns:
        out: Tensor
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    settings = ctx._get_op_settings('detach')
    opid = _ir.OperatorIdentifier("ai.graphcore", "Detach", 1,
                                  _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_DetachOp(
        {0: t.id},
        {0: g._create_tensor_id("detach_out")},
        opid,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 20
0
def dynamic_slice(t: Tensor, index: Tensor, axes: List[int], sizes: List[int],
                  no_overlap: bool) -> Tensor:
    """
    Returns a cloned slice of the input Tensor.

    The word "dynamic" refers to the fact that the index can be specified
    during runtime.

    A slice along an axis can be defined as by the tuple
    ( start, stop, step )
    start - will be equal the index for the respective axis
    stop - will be equal index + size for the respective axis
    step - will equal 1

    Limitations:
    Assuming we would like to slice A with dimension (4, 3)
    - Step other than 1 is not supported (i.e. t[::2,:] is not supported)
    - Negative slicing is not supported (i.e. t[:-1,:] is not supported)
    - stop greater than the size of the axis is not supported
     (i.e. t[:5,:] is not supported)

    Args:
        t: Tensor
            Input tensor.
        index: Tensor
            The indices to start the slice from.
        axes: List[int]
            The axes to slice from.
        sizes: List[int]
            The sizes of the slices for the specified axes.
            For example:
            If index = [1, 2], axes = [0, 3] and sizes = [2, 4], the Tensor will be sliced
            t[1:2, :, :, 2:4]
        no_overlap : bool
            If set to true, then correct gradient backpropagation is only guaranteed if
            each region in the output tensor has exactly one populator
            (operation that writes data to this region).
            There are no run-time or compile-time checks possible to ensure this.            
    Returns:
        out: Tensor
            A clone (i.e. not a view) of the sliced input tensor.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t, index)

    settings = ctx._get_op_settings('dynamicslice')
    opid = _ir.OperatorIdentifier("ai.graphcore", "DynamicSlice", 1,
                                  _ir.NumInputs(2, 2), 1)
    op = pb_g.createConnectedOp_DynamicSliceOp(
        {
            0: t.id,
            1: index.id
        }, {0: g._create_tensor_id(f"dynamic_slice_out")}, opid, axes, sizes,
        no_overlap, settings)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 21
0
def tied_gather(t: Tensor,
                indices: Tensor,
                axis: int = 0,
                available_memory_proportion: Optional[float] = None) -> Tensor:
    """
    Select multiple elements from an array, given by `indices`, along a specified axis.

    When `axis == 0`, it is equivlent to numpy "fancy indexing".

    Numerically the same as the `gather` op but does not specify the tile
    layout of the `indices` tensor. When preceding a `matmul` op the tile
    layout of the indices is determined by the `matmul`, not the `tied_gather`.
    This has a has lower memory footprint but costs extra cycles due to the exchange.

    Pseudo example:
    ```
    tied_gather(x, [1, 2, 3]) == [x[3], x[7], x[2]]
    ```

    Args:
        t: Tensor
            Input tensor
        indices: Tensor
            The indices of the elements to extract
        axis: int
            Which axis to gather on. Default is 0.
        available_memory_proportion: Optional[float]
            The maximum proportion of available memory on each tile that this layer
            should consume temporarily during the course of the operation.
            Defaults to 1.0 if not set globally.
                
    Returns:
        gather: Tensor
            The gathered elements concatenated.
    """

    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)
    check_in_graph(g, indices)

    available_memory_proportion = convert_optional_float(
        available_memory_proportion)

    settings = ctx._get_op_settings("tiedgather")
    op = pb_g.createConnectedOp_TiedGatherOp(
        {
            0: t.id,
            1: indices.id
        }, {0: g._create_tensor_id("tiedgather_out")},
        axis_=axis,
        available_memory_proportion_=available_memory_proportion,
        settings=settings)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 22
0
def accumulate_mean_(t: Tensor, X: Tensor, step: Union[float,
                                                       Tensor]) -> Tensor:
    """
    Updates a tensor `t` inplace using `t = (step/(step+1)) * t + (1/(step+1)) * X`.
    Intended to be used to keep track of the mean of a series of values.

    For example:
    ```
    with g:
        accum = pir.variable(0, dtype=pir.float32)
        a = pir.variable(1, dtype=pir.float32)
        b = pir.variable(2, dtype=pir.float32)
        accumulate_mean(accum, a, 0.0)
        accumulate_mean(accum, b, 1.0)
    ```
    will result with `accum` having the value `(a+b)/2 = 1.5`.

    Does not apply numpy broadcasting.
    Uses mixed precision poplibs operations.
    `t` and `X` must be the same shape, but can be different types.
    `step` must be scalar.

    Args:
        `t`: Tensor
            Tensor to be updated.
        `X`: Tensor
            Value to update the variable
        step: Union[float, Tensor]]
            Value representing the number of previously accumulated values.
    Returns:
        updated: Tensor
            An alias to the variable.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    step = t._ensure_tensor(step, dtype=dtypes.float32)

    check_in_graph(g, t, X, step)

    settings = ctx._get_op_settings('accumulate')
    op = pb_g.createConnectedOp_AccumulateOp(
        {
            0: t.id,
            1: X.id,
            2: step.id
        },
        {
            0: g._create_tensor_id('accumulate_mean__' + t.name),
        },
        _ir.AccumulationType.Mean,
        _ir.OptimizerValue(),
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 23
0
def split(t: Tensor,
          splits: Union[int, List[int]],
          axis: int = 0) -> List[Tensor]:
    """
    Splits a tensor on a given axis into a list of tensors.

    Args:
        t: Tensor
            Tensor to be split.
        splits: int or List[int]
            Either an int which specifies the number of splits or a list of ints specifing the length of each output tensor.
        axis: int (default 0)
            Which axis to split on
    Returns:
        out: List[Tensor]
            A list of tensors
    """

    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    axis = handle_negative_axis(t, axis)

    if isinstance(splits, int):
        axis_len = t.shape[axis]
        if axis_len % splits != 0:
            raise ValueError(
                f"Split {splits} does not equally divide tensor axis {axis} of length {axis_len}."
            )
        splits = [axis_len // splits] * splits

    outputs_t = {
        i: g._create_tensor_id(f"{t.name}_split_{i}")
        for i in range(len(splits))
    }

    settings = ctx._get_op_settings('split')
    opid = _ir.OperatorIdentifier("ai.onnx", "Split", 2, _ir.NumInputs(1, 1),
                                  1)
    op = pb_g.createConnectedOp_SplitOp(
        {0: t.id},
        outputs_t,
        axis_=axis,
        split_=splits,
        opid=opid,
        settings=settings,
    )

    output = [
        Tensor._from_pb_tensor(op.outTensor(i)) for i in range(len(splits))
    ]

    return output
Ejemplo n.º 24
0
def remote_store(
        t: Tensor,
        offset: Optional[Tensor] = None,
        remote_buffer_handle: Optional[RemoteBufferHandle] = None) -> None:
    """Store the input tensor to a remote (off-chip) buffer.

    This Op is typically used when the user wants to store several different identically
    shaped tensors to the same remote buffer by specifying the offset (see below).

    Op instances with matching `remote_buffer_id` (specified in the `remote_buffer_handle`)
    will outline together, meaning that if multiple different tensors are to be stored
    under the same remote buffer ID, a different `offset` value has to be supplied for
    each tensor.

    The `remote_buffer_handle` handles the relationship between `remote_buffer_id`, shape
    and datatype as shape and datatype needs to be fixed for each `remote_buffer_id`.

    All `offset`s and `remote_buffer_id`s need to be >= 0.

    If `t` is of rank `x`, the remote buffer of a certain `remote_buffer_id` will be of
    rank `x+1`, where the new dimension (the row) will be of size `N`.

    See also: `remote_buffer_handle`, `remote_load`.

    Args:
        t (Tensor): Tensor to copy and store in the remote buffer.
        offset (Optional[Tensor], optional): Optional 0-rank Tensor.
          Specify the row in the remote buffer the inTensor will be written to.
          Defaults to None.
        remote_buffer_handle (Optional[RemoteBufferHandle], optional): The handle to the remote
          buffer. Defaults to None.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if offset is not None:
        check_in_graph(g, offset)

    remote_buffer_handle = prepare_remote_buffer(t, remote_buffer_handle, g)

    settings = ctx._get_op_settings('remote_store')
    opid = _ir.OperatorIdentifier("ai.graphcore", "RemoteStore", 1,
                                  _ir.NumInputs(1, 2), 0)

    if offset is not None:
        _ = pb_g.createConnectedOp_RemoteStoreOp({
            0: t.id,
            1: offset.id
        }, {}, opid, settings, remote_buffer_handle.remote_buffer_id)
    else:
        _ = pb_g.createConnectedOp_RemoteStoreOp({
            0: t.id,
        }, {}, opid, settings, remote_buffer_handle.remote_buffer_id)
Ejemplo n.º 25
0
def remote_load(
        t: Tensor,
        offset: Optional[Tensor] = None,
        remote_buffer_handle: Optional[RemoteBufferHandle] = None) -> Tensor:
    """Load a tensor from remote (off-chip) buffer.

    The tensor will be loaded from the memory location corresponding to
    `remote_buffer_id` (specified in the `remote_buffer_handle`),
    and will be stored in the memory location corresponding to `t`.

    The relationship between `offset` and `remote_buffer_id` is thoroughly
    described in `remote_store`.

    See also: `remote_buffer_handle`, `remote_store`, `remote_load_`

    Args:
        t (Tensor): This tensor will be cloned, and the loaded data will written to the clone.
        offset (Optional[Tensor], optional): Optional 0-rank Tensor.
          Specify the row in the remote buffer the inTensor will be loaded from.
          Defaults to None.
        remote_buffer_handle (Optional[RemoteBufferHandle], optional): The handle to the remote
          buffer. Defaults to None.
    Returns:
        Tensor: The tensor loaded from the remote buffer
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if offset is not None:
        check_in_graph(g, offset)

    remote_buffer_handle = prepare_remote_buffer(t, remote_buffer_handle, g)

    settings = ctx._get_op_settings('remote_load')
    opid = _ir.OperatorIdentifier("ai.graphcore", "RemoteLoad", 1,
                                  _ir.NumInputs(1, 2), 1)

    if offset is not None:
        op = pb_g.createConnectedOp_RemoteLoadOp(
            {
                0: t.id,
                1: offset.id
            }, {0: g._create_tensor_id("remote_load_out")}, opid, settings,
            remote_buffer_handle.remote_buffer_id)
    else:
        op = pb_g.createConnectedOp_RemoteLoadOp(
            {
                0: t.id,
            }, {0: g._create_tensor_id("remote_load_out")}, opid, settings,
            remote_buffer_handle.remote_buffer_id)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 26
0
def gather(t: Tensor,
           indices: Tensor,
           axis: int = 0,
           available_memory_proportion: Optional[float] = None) -> Tensor:
    """
    Select multiple elements from an array, given by `indices`, along a specified axis.

    When `axis == 0`, it is equivlent to numpy "fancy indexing".

    Pseudo example:
    ```
    gather(x, [1, 2, 3]) == [x[3], x[7], x[2]]
    ```

    Args:
        t: Tensor
            Input tensor
        indices: Tensor
            The indices of the elements to extract
        axis: int
            Which axis to gather on. Default is 0.
        available_memory_proportion: Optional[float]
            The maximum proportion of available memory on each tile that this layer
            should consume temporarily during the course of the operation.
            Defaults to 1.0 if not set globally.

    Returns:
        gather: Tensor
            The gathered elements concatenated.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)
    check_in_graph(g, indices)

    available_memory_proportion = convert_optional_float(
        available_memory_proportion)

    opid = _ir.OperatorIdentifier("ai.onnx", "Gather", 11, _ir.NumInputs(2, 2),
                                  1)
    settings = ctx._get_op_settings("gather")
    op = pb_g.createConnectedOp_GatherOp(
        {
            0: t.id,
            1: indices.id
        }, {0: g._create_tensor_id("gather_out")},
        opid=opid,
        axis_=axis,
        available_memory_proportion_=available_memory_proportion,
        settings=settings)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 27
0
def remote_load_(
        t: Tensor,
        offset: Optional[Tensor] = None,
        remote_buffer_handle: Optional[RemoteBufferHandle] = None) -> Tensor:
    """Load a tensor from remote (off-chip) buffer inplace.

    This op is identical to `remote_load` with the exception of how `t` is handled.
    In `remote_load` `t` is cloned and the output is written to the clone, whereas
    in this version `t` is written to directly.

    See also: `remote_buffer_handle`, `remote_store`, `remote_load`

    Args:
        t (Tensor): The tensor the loaded data will written to the clone.
        offset (Optional[Tensor], optional): Optional 0-rank Tensor.
          Specify the row in the remote buffer the inTensor will be loaded from.
          Defaults to None.
        remote_buffer_handle (Optional[RemoteBufferHandle], optional): The handle to the remote
          buffer. Defaults to None.
    Returns:
        Tensor: The tensor loaded from the remote buffer
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if offset is not None:
        check_in_graph(g, offset)

    remote_buffer_handle = prepare_remote_buffer(t, remote_buffer_handle, g)

    settings = ctx._get_op_settings('remote_load_inplace')
    opid = _ir.OperatorIdentifier("ai.graphcore", "RemoteLoadInplace", 1,
                                  _ir.NumInputs(1, 2), 1)

    if offset is not None:
        op = pb_g.createConnectedOp_RemoteLoadInplaceOp(
            {
                0: t.id,
                1: offset.id
            }, {0: g._create_tensor_id("remote_load_inplace_out")}, opid,
            settings, remote_buffer_handle.remote_buffer_id)
    else:
        op = pb_g.createConnectedOp_RemoteLoadInplaceOp(
            {
                0: t.id,
            }, {0: g._create_tensor_id("remote_load_inplace_out")}, opid,
            settings, remote_buffer_handle.remote_buffer_id)

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 28
0
def ipu_copy(t: Tensor, destination: int,
             source: Optional[int] = None) -> Tensor:
    """
    Copies a Tensor to a virtual graph.

    Args:
        t: Tensor
            Tensor to be copied.
        destination: int
            Ipu for the tensor to be copied to.
        source: Optional[int]
            Ipu for the tensor to be copied from.
            By default, the source will be taken from the producer of `t`.
            If `t` does not have a producer a source MUST be provided.

    Returns:
        t_copied: Tensor
            The copied tensor
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if source is None:
        # Use internal method to infer the input tensor's virtual graph.
        source = t._pb_tensor.getVirtualGraphIdUnsafe()
        if source == -1:
            raise ValueError(
                f"Could not infer virtual graph for Tensor to be copied \"{t}\" . "
                "Please specify `source` when copying for this tensor.")

    settings = ctx._get_op_settings('ipucopy')
    opid = _ir.OperatorIdentifier("ai.graphcore", "IpuCopy", 1,
                                  _ir.NumInputs(1, 1), 1)
    op = pb_g.createConnectedOp_IpuCopyOp(
        {
            0: t.id,
        },
        {
            0: g._create_tensor_id(t.name + f"_c{destination}"),
        },
        opid,
        source,
        destination,
        settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))
Ejemplo n.º 29
0
def _setup_call_and_repeat(
    pb_ir: _ir.Ir, pb_top_graph: _ir.Graph, pb_bottom_graph: _ir.Graph
) -> Tuple[_ir.Graph, _ir.op.CallOp, _ir.op.LoopOp]:
    """Setup the call and repeat ops, as well as the middle graph that the loop op will loop.

    Args:
        pb_ir (_ir.Ir): The _ir level Ir
        pb_top_graph (_ir.Graph): The _ir top level graph that will contain the loop op.
        pb_bottom_graph (_ir.Graph): The _ir user defined subgraph that will be called.

    Returns:
        Tuple[_ir.Graph, _ir.op.CallOp, _ir.op.LoopOp]: The created _ir-level middle graph, call op
            and loop op.
    """
    # This is the graph we will repeat.
    pb_middle_graph = pb_ir.createGraph(
        _ir.GraphId(
            pb_ir.createUniqueSubgraphId(
                f"{pb_bottom_graph.id.str()}__loop_wrapper")))

    opid = _ir.OperatorIdentifier("ai.graphcore", "Call", 1, _ir.NumInputs(),
                                  0)
    op_name = pb_middle_graph.id.str() + '__call__' + pb_bottom_graph.id.str()

    ctx = get_current_context()
    # Call the bottom_graph
    pb_callop = pb_middle_graph.createOp_CallOp(opid, pb_bottom_graph,
                                                ctx._get_op_settings(op_name))

    opid = _ir.OperatorIdentifier("ai.onnx", "Loop", 11, _ir.NumInputs(), 0)
    op_name = pb_top_graph.id.str() + '__loop__' + pb_middle_graph.id.str()

    # Loop the middle_graph
    pb_loop_op = pb_top_graph.createOp_LoopOp(opid,
                                              ctx._get_op_settings(op_name),
                                              pb_middle_graph)

    # Add mandatory loop iterator tensor to subgraph (is not an output)
    repeatIterId = _ir.addScope(pb_middle_graph, "Iterator___")
    pb_middle_graph.addInput(repeatIterId,
                             _ir.TensorInfo(_ir.DataType.INT32, ()))

    # Add mandatory loop condition tensor to subgraph (is also an output)
    repeatCondId = _ir.addScope(pb_middle_graph, "LoopCond___")
    pb_middle_graph.addInput(repeatCondId,
                             _ir.TensorInfo(_ir.DataType.BOOL, ()))
    pb_middle_graph.markAsOutput(repeatCondId)

    return pb_middle_graph, pb_callop, pb_loop_op
Ejemplo n.º 30
0
def slice_(t: Tensor,
           start: Optional[Union[int, List[Optional[int]]]] = None,
           stop: Optional[Union[int, List[Optional[int]]]] = None,
           step: Optional[Union[int, List[Optional[int]]]] = None,
           axis: Optional[Union[int, List[int]]] = None) -> Tensor:
    """
    Selects elements from a tensor using a slice or multiple slices. Inplace.

    This is the inplace version of :func:`~ops.slice`. Behaviour is the same, but modifies the
        tensor inplace.
    ```

    Args:
        t (Tensor): Tensor to slice
        start: Index of first element (inclusive) or `None` which defaults to 0.
        stop: Index of last element (exclusive) or `None` which defaults to last
            element (inclusive) if step is forward or first element (inclusive) if step is backwards.
        step: `1` for forward or `-1` for backwards.
        axis: Axis of tensor to slice on or `None` will default to each axis sequentially.

    Returns:
        Tensor: alias of the input tensor t.
    """
    ctx = get_current_context()
    g = ctx.graph
    pb_g = g._pb_graph

    check_in_graph(g, t)

    if start is None and stop is None and step is None:
        return t

    start, stop, step, axis = process_args(start, stop, step, axis)

    opid = _ir.OperatorIdentifier("ai.graphcore", "SliceInplace", 1,
                                  _ir.NumInputs(1, 1), 1)
    settings = ctx._get_op_settings("slice_inplace")
    op = pb_g.createConnectedOp_SliceInplaceOp(
        {0: t.id},
        {0: g._create_tensor_id("slice_out")},
        starts_=start,
        ends_=stop,
        axes_=axis,
        steps_=step,
        opid=opid,
        settings=settings,
    )

    return Tensor._from_pb_tensor(op.outTensor(0))