def _setup_call_and_repeat( pb_ir: _ir.Ir, pb_top_graph: _ir.Graph, pb_bottom_graph: _ir.Graph ) -> Tuple[_ir.Graph, _ir.op.CallOp, _ir.op.LoopOp]: """Setup the call and repeat ops, as well as the middle graph that the loop op will loop. Args: pb_ir (_ir.Ir): The _ir level Ir pb_top_graph (_ir.Graph): The _ir top level graph that will contain the loop op. pb_bottom_graph (_ir.Graph): The _ir user defined subgraph that will be called. Returns: Tuple[_ir.Graph, _ir.op.CallOp, _ir.op.LoopOp]: The created _ir-level middle graph, call op and loop op. """ # This is the graph we will repeat. pb_middle_graph = pb_ir.createGraph( _ir.GraphId( pb_ir.createUniqueSubgraphId( f"{pb_bottom_graph.id.str()}__loop_wrapper"))) opid = _ir.OperatorIdentifier("ai.graphcore", "Call", 1, _ir.NumInputs(), 0) op_name = pb_middle_graph.id.str() + '__call__' + pb_bottom_graph.id.str() ctx = get_current_context() # Call the bottom_graph pb_callop = pb_middle_graph.createOp_CallOp(opid, pb_bottom_graph, ctx._get_op_settings(op_name)) opid = _ir.OperatorIdentifier("ai.onnx", "Loop", 11, _ir.NumInputs(), 0) op_name = pb_top_graph.id.str() + '__loop__' + pb_middle_graph.id.str() # Loop the middle_graph pb_loop_op = pb_top_graph.createOp_LoopOp(opid, ctx._get_op_settings(op_name), pb_middle_graph) # Add mandatory loop iterator tensor to subgraph (is not an output) repeatIterId = _ir.addScope(pb_middle_graph, "Iterator___") pb_middle_graph.addInput(repeatIterId, _ir.TensorInfo(_ir.DataType.INT32, ())) # Add mandatory loop condition tensor to subgraph (is also an output) repeatCondId = _ir.addScope(pb_middle_graph, "LoopCond___") pb_middle_graph.addInput(repeatCondId, _ir.TensorInfo(_ir.DataType.BOOL, ())) pb_middle_graph.markAsOutput(repeatCondId) return pb_middle_graph, pb_callop, pb_loop_op
def gelu(t: Tensor) -> Tensor: """ Computes the Gelu activation on a Tensor. https://arxiv.org/abs/1606.08415 Args: t: Tensor Input tensor. Returns: out: Tensor Output tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('gelu') opid = _ir.OperatorIdentifier("ai.graphcore", "Gelu", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_GeluOp({0: t.id}, {0: g._create_tensor_id(f"gelu_out")}, opid, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def softmax(t: Tensor, axis: int) -> Tensor: """ Computes the softmax operation on a Tensor. This recales the slices of axis such that all elements are within [0, 1] and sum to 1. The output shape and dtype matches the input. Args: t: Tensor Tensor to be softmaxed. axis: int The axis along which the softmax will be computed. Returns: out: Tensor The softmaxed tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('softmax') opid = _ir.OperatorIdentifier("ai.onnx", "Softmax", 11, _ir.NumInputs( 1, 1), 1) op = pb_g.createConnectedOp_SoftmaxOp( {0: t.id}, {0: g._create_tensor_id(f"softmax_out")}, opid, handle_negative_axis(t, axis), settings) return Tensor._from_pb_tensor(op.outTensor(0))
def transpose(t: Tensor, permutation: Optional[Tuple[int, ...]] = None) -> Tensor: """ Permute the axes of a Tensor. By default reverses the axes of t. Args: t: Tensor Tensor to be transposed. permutation: tuple of ints (optional) Tuple containing the a permutation of [0, N-1] where N is the rank of input `t`. If not provided, the axes will be reversed. Returns: out: Tensor The transposed tensor """ permutation = _handle_permuation(t, permutation) ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('transpose') opid = _ir.OperatorIdentifier("ai.onnx", "Transpose", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_TransposeOp( {0: t.id}, {0: g._create_tensor_id(f"{t.name}_T")}, opid, permutation, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def detach(t: Tensor) -> Tensor: """ Prevents gradient computation of this tensor. Numerically equivlent to the identity op. Args: t: Tensor Input tensor. Returns: out: Tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('detach') opid = _ir.OperatorIdentifier("ai.graphcore", "Detach", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_DetachOp( {0: t.id}, {0: g._create_tensor_id("detach_out")}, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def test_accumulate_zero_op(connected: bool) -> None: """Test the AccumulatorZeroOp. Args: connected (bool): Whether to use the createConnected<opname> function or just create<opname> """ _, graphs = create_ir() main = graphs[0] num_inputs = _ir.NumInputs(2, 2) input_ = add_actgrad_tensor("input", [4], main) updater = add_actgrad_tensor("updater", [4], main) factor = add_actgrad_tensor("factor", [4], main) out = add_actgrad_tensor("updated_weight", [4], main) opid = _ir.OperatorIdentifier("ai.graphcore", "AccumulatorZeroOp", 1, num_inputs, 1) settings = _ir.Settings(main, "AccumulatorZeroOp") if connected: ins: Dict[int, str] = {0: input_.id, 1: updater.id, 2: factor.id} outs: Dict[int, str] = {0: out.id} op = main.createConnectedOp_AccumulatorZeroOp(ins, outs, settings=settings) return op = main.createOp_AccumulatorZeroOp(settings=settings) op.connectInTensor(0, input_.id) op.connectInTensor(1, updater.id) op.connectInTensor(2, factor.id) op.connectOutTensor(0, out.id) op.setup()
def host_store(d2h_stream: DeviceToHostStream, t: Tensor) -> None: """ Host Store: an op to represent the transfer of data from the device to the host. It uses the existing device to host transfers created when building the IR, but defers the actual poplar::Copy until the op itself runs. This allows the copy to be scheduled as part of the normal op scheduling. Args: t (Tensor): The input tensor to copy to host. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) check_in_graph(g.ir().main_graph(), d2h_stream._stream_tensor) if d2h_stream.dtype != t.dtype: raise ValueError( f'dtype of stream {d2h_stream.tensor_id()} `{d2h_stream.dtype}` does not match dtype of provided tensor `{t.dtype}`' ) if d2h_stream.shape != t.shape: raise ValueError( f'shape of stream {d2h_stream.tensor_id()} `{d2h_stream.shape}` does not match shape of provided tensor `{t.shape}`' ) opid = _ir.OperatorIdentifier("ai.graphcore", "HostStore", 1, _ir.NumInputs(1), 0) pb_g.createConnectedOp_HostStoreOp({0: t.id}, {}, opid, ctx._get_op_settings('host_store'), d2h_stream.tensor_id())
def slice(t: Tensor, start: Optional[Union[int, List[Optional[int]]]] = None, stop: Optional[Union[int, List[Optional[int]]]] = None, step: Optional[Union[int, List[Optional[int]]]] = None, axis: Optional[Union[int, List[int]]] = None) -> Tensor: """ Selects elements from a tensor using a slice or multiple slices. A slice specifies the start (inclusive) and stop (exclusive) index of elements to select. Multiple slices can be specified using a list of items for each parameter (start, stop, step). If step is `-1` the slice is performed backwards. If axis is not specified, each slice will correspond to axis 0 to N where N is the number of slices. Examples: ``` t == slice(t) == slice(t, axis=1) slice(t, start=1) # Slice axis 0 from start index 1 slice(t, start=[1,2]) == slice(t, start=[1,2], axis=[0,1]) slice(t, stop=-2) # Slice axis 0 upto second last element (exclusive) slice(t, stop=3, step=-1) # Slice backwards from last element (inclusive) to third last element (exclusive) ``` Args: t (Tensor): Tensor to slice start: Index of first element (inclusive) or `None` which defaults to 0. stop: Index of last element (exclusive) or `None` which defaults to last element (inclusive) if step is forward or first element (inclusive) if step is backwards. step: `1` for forward or `-1` for backwards. axis: Axis of tensor to slice on or `None` will default to each axis sequentially. Returns: Tensor: output tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if start is None and stop is None and step is None: return t start, stop, step, axis = process_args(start, stop, step, axis) opid = _ir.OperatorIdentifier("ai.onnx", "Slice", 11, _ir.NumInputs(1, 1), 1) settings = ctx._get_op_settings("slice") op = pb_g.createConnectedOp_SliceOp( {0: t.id}, {0: g._create_tensor_id("slice_out")}, starts_=start, ends_=stop, axes_=axis, steps_=step, opid=opid, settings=settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def cast(t: Tensor, data_type: dtype) -> Tensor: """ Casts tensor `t` to data type `dtype`. Args: t: Tensor Tensors to be casted. data_type: popart.ir.dtypes.dtype Dtype to cast to Returns: add: Tensor The sum of lhs and rhs """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('cast') opid = _ir.OperatorIdentifier("ai.onnx", "Cast", 9, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_CastOp( {0: t.id}, {0: g._create_tensor_id(f"{t.id}_{data_type._name}")}, _to=data_type._pb_dtype, opid=opid, settings=settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def test_accumulate_op(connected: bool) -> None: """Test the Accumulate Op. Args: connected (bool): Whether to use the createConnected<opname> function or just create<opname> """ _, graphs = create_ir() main = graphs[0] num_inputs = _ir.NumInputs(2, 2) weight = add_random_tensor("weight", _ir.TensorType.Variable, [4], main) grad = add_actgrad_tensor("grad", [4], main) out = add_actgrad_tensor("updated_weight", [4], main) opid = _ir.OperatorIdentifier("ai.graphcore", "Accumulate", 1, num_inputs, 1) settings = _ir.Settings(main, "accumulate") if connected: ins: Dict[int, str] = {0: weight.id, 1: grad.id} outs: Dict[int, str] = {0: out.id} op = main.createConnectedOp_AccumulateOp(ins, outs, _ir.AccumulationType.Add, _ir.OptimizerValue(0.5), settings=settings) return op = main.createOp_AccumulateOp(_ir.AccumulationType.Add, _ir.OptimizerValue(0.5), settings=settings) op.connectInTensor(0, weight.id) op.connectInTensor(1, grad.id) op.connectOutTensor(0, out.id) op.setup()
def test_tiedgather_op(connected: bool) -> None: """Test the Tied Gather Op. Args: connected (bool): Whether to use the createConnected<opname> function or just create<opname> """ _, graphs = create_ir() main = graphs[0] num_inputs = _ir.NumInputs(2, 2) in0 = add_actgrad_tensor("in0", [8], main, _ir.DataType.INT32) indices = add_random_tensor("indices", _ir.TensorType.Variable, [16, 4], main) out0 = add_actgrad_tensor("out0", [8, 4], main) settings = _ir.Settings(main, "tiedgather") if connected: ins: Dict[int, str] = {0: in0.id, 1: indices.id} outs: Dict[int, str] = {0: out0.id} op = main.createConnectedOp_TiedGatherOp( ins, outs, axis_=0, available_memory_proportion_=_ir.OptionalFloat(0.4), settings=settings) op.setup() return op = main.createOp_TiedGatherOp( axis_=0, available_memory_proportion_=_ir.OptionalFloat(0.4), settings=settings) op.connectInTensor(0, in0.id) op.connectInTensor(1, indices.id) op.connectOutTensor(0, out0.id) op.setup()
def logical_not(t: Tensor) -> Tensor: """ Computes element-wise the value of NOT t. Inputs will be cast to bool if needed. Args: t: Tensor Input tensor. Returns: out: Tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) t = cast_if_needed(t, dtypes.bool) settings = ctx._get_op_settings('not') opid = _ir.OperatorIdentifier("ai.onnx", "Not", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_NotOp( {0: t.id}, {0: g._create_tensor_id("not_out")}, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def replicated_reduce_scatter(t: Tensor, op: CollectiveOperator = CollectiveOperator.Add, group: Optional[CommGroup] = None) -> Tensor: """Reduces tensor `t` across replicas. Each replica will only receive a unique slice of `t`. Args: t (Tensor): Tensor to be reduced. Inputs will be flattened. op (CollectiveOperator, optional): Operation to reduce with. Defaults to CollectiveOperator.Add. group (Optional[CommGroup], optional): Replicas to reduce across. Defaults to All replicas. Returns: Tensor: A slice of the reduced tensor. Always a 1D tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if group is None: group = CommGroup() settings = ctx._get_op_settings('replicated_reduce_scatter') opid = _ir.OperatorIdentifier("ai.graphcore", "ReplicatedReduceScatter", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_ReplicatedReduceScatterOp( {0: t.id}, {0: g._create_tensor_id(t.name + "_reduce_scattered")}, opid, op, group, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def replicated_all_gather(t: Tensor, group: Optional[CommGroup] = None) -> Tensor: """Gathers tensor `t` across replicas. Output tensor contains in the values of `t` from each replica. Args: t (Tensor): Tensor to be reduced. Must be rank=1. group (Optional[CommGroup], optional): Replicas to gather from. Defaults to All replicas. Returns: Tensor: Gathered tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if group is None: group = CommGroup() settings = ctx._get_op_settings('replicated_all_gathered') opid = _ir.OperatorIdentifier("ai.graphcore", "ReplicatedAllGather", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_ReplicatedAllGatherOp( {0: t.id}, {0: g._create_tensor_id(t.name + "_all_gathered")}, opid, group, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def test_call_op(connected: bool): """Test the special case of the call op Args: connected (bool): Whether to use the createConnected<opname> function or just create<opname> """ _, graphs = create_ir(["sub_graph"]) # main graph and 'sub_graph' main = graphs[0] sub_graph = graphs[1] num_inputs = _ir.NumInputs(1, 1) in0 = add_actgrad_tensor("in0", [1, 2, 3], main) out0 = add_actgrad_tensor("out0", [1, 2, 3], main) sub_graph.addInput("inputA", in0.info) opid = _ir.OperatorIdentifier("ai.graphcore", "Call", 1, num_inputs, 1) settings = _ir.Settings(main, "new_settings") if connected: ins: Dict[int, str] = {0: in0.id} outs: Dict[int, str] = {0: out0.id} op = main.createConnectedOp_CallOp(ins, outs, opid, sub_graph, settings) else: op = main.createOp_CallOp(opid, sub_graph, settings) op.connectInTensor(0, in0.id) op.connectOutTensor(0, out0.id) op.setup() assert op.getCalledGraphs()[0] == sub_graph assert op.getCalledGraphIds()[0] == "sub_graph"
def init(shape: Iterable[int], dtype: dtypes.dtype, name: Optional[str] = None) -> Tensor: """ Init Op: create a tensor with zero values. The returned tensor is not considered a variable. Args: dtype (dtypes.dtype): Data type for the output Tensor shape (Tuple[int]): Shape of the output tensor. name (str): Name to use for the poplar stream. Returns: Tensor: The output tensor streamed from host. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph info = _ir.TensorInfo(dtype._pb_dtype, list(shape)) opid_init = _ir.OperatorIdentifier("ai.graphcore", "Init", 1, _ir.NumInputs(0), 1) op = pb_g.createConnectedOp_InitOp( {}, {0: g._create_tensor_id(name)}, opid_init, info, _ir.TensorType.ActGrad, _ir.InitType.Zero, ctx._get_op_settings('init'), -1, ) return Tensor._from_pb_tensor(op.outTensor(0))
def test_init_op(init_type: "_ir.InitType", connected: bool): """Test the special case of the init op Args: init_type (_ir.InitType): The initialisation type to use (zero/no init) connected (bool): Whether to use the createConnected<opname> function or just create<opname> """ _, graphs = create_ir() g = graphs[0] out0 = add_actgrad_tensor("out0", [1, 2, 3], g) opid = _ir.OperatorIdentifier("ai.onnx", "Init", 1, _ir.NumInputs(0, 0), 1) settings = _ir.Settings(g, "new_settings") if connected: op = g.createConnectedOp_InitOp({}, {0: out0.id}, opid, out0.info, out0.tensorType(), init_type, settings, 0) else: op = g.createOp_InitOp(opid, out0.info, out0.tensorType(), init_type, settings, 0) op.connectOutTensor(0, out0.id) op.setup() assert not op.hasInput(0) assert op.outTensor(0) == out0 assert op.hasOutput(0) assert op.outId(0) == out0.id
def test_ipu_copy_op(source: int, destination: int, connected: bool) -> None: """Test the ipu copy op Args: source (int): Source IPU destination (int): Destination IPU connected (bool): Whether to use the createConnected<opname> function or just create<opname> """ _, graphs = create_ir() g = graphs[0] in0 = add_actgrad_tensor("in0", [1, 2, 3], g) opid = _ir.OperatorIdentifier("ai.graphcore", "IpuCopy", 1, _ir.NumInputs(0, 0), 1) settings = _ir.Settings(g, "new_settings") if connected: op = g.createConnectedOp_IpuCopyOp({0: in0.id}, {0: "outId"}, opid, source, destination, settings) op.setup() else: op = g.createOp_IpuCopyOp(opid, destination, settings) op.connectInTensor(0, in0.id, source) op.createAndConnectOutTensor(0, "outId") op.setup() assert op.inTensor(0) == in0 assert op.hasInput(0) assert op.hasOutput(0) assert op.outId(0) == "outId" assert op.getDestIpu() == destination assert op.getSourceIpu() == source assert op.getSourceIpu("in0") == source assert op.getMinSourceIpu() == source assert op.getMaxSourceIpu() == source
def test_op_attributes(attribute: str, shorthand: str, input_id: int): """Test the various attributes that can be applied to ops. Args: attribute (str): Name of the attribute shorthand (str): Shorthand of the attribute e.g. VirtualGraphId -> VGraphId input_id (int): Long int for the id to use for the attribute. """ _, graphs = create_ir(["A"]) g = graphs[0] settings = _ir.Settings(g, "new_settings") num_inputs = _ir.NumInputs(1, 1) opid = _ir.OperatorIdentifier("ai.onnx", "Identity", 1, num_inputs, 1) op = _ir.Op(opid, settings) getter = getattr(op, "get" + attribute) setter = getattr(op, "set" + attribute) hasser = getattr(op, "has" + attribute) get_optional = getattr(op, "getOptional" + shorthand) assert not hasser() id_ = getattr(_ir, "Optional" + shorthand) # Unset optional setter(id_()) assert get_optional() == id_() with pytest.raises(popart.popart_exception) as e_info: getter() assert (e_info.value.args[0] == f"Cannot return {attribute} for Op") assert not hasser() # Set optional == 0 setter(id_(input_id)) assert getter() == input_id assert hasser() assert get_optional() == id_(input_id)
def test_shapes(shape1: List[int], shape2: List[int], expected: List[int], dtype: str): """Test the shapes and np broadcasting. Don't really need to test the broadcasting as that is tested at C++ level. But try a few cases to be sure binding works correctly. Args: shape1 (List[int]): First tensor shape shape2 (List[int]): Second Tensor Shape expected (List[int]): Expected shape dtype (Str): Popart data type to use """ ir, graphs = create_ir(["A"]) g = graphs[0] settings = _ir.Settings(g, "new_settings") num_inputs = _ir.NumInputs(1, 1) opid = _ir.OperatorIdentifier("ai.onnx", "Identity", 1, num_inputs, 1) op = _ir.Op(opid, settings) shape = op.prettyNpOut(shape1, shape2) assert shape == list(expected) t1 = _ir.TensorInfo(dtype, shape1) t2 = _ir.TensorInfo(dtype, shape2) shape = op.prettyNpOut(t1, t2) assert shape == _ir.TensorInfo(dtype, expected)
def mul(lhs: Tensor, rhs: Tensor) -> Tensor: """ Multiplies two Tensors element-wise. Follows numpy broadcasting rules. Arguments must have the same dtype. Args: lhs, rhs: Tensor Tensors to be multiplied. Returns: mul: Tensor The product of lhs and rhs """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, lhs, rhs) settings = ctx._get_op_settings('mul') opid = _ir.OperatorIdentifier("ai.onnx", "Mul", 7, _ir.NumInputs(2, 2), 1) op = pb_g.createConnectedOp_MulOp( { 0: lhs.id, 1: rhs.id }, { 0: g._create_tensor_id("mul_out"), }, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def equal(lhs: Tensor, rhs: Tensor) -> Tensor: """ Compares two Tensors element-wise with an equal operator. Follows numpy broadcasting rules. Args: lhs, rhs: Tensor Tensors to be compared. Returns: out: Tensor The value (lhs == rhs) """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, lhs, rhs) settings = ctx._get_op_settings('equal') opid = _ir.OperatorIdentifier("ai.onnx", "Equal", 7, _ir.NumInputs(2, 2), 1) op = pb_g.createConnectedOp_AndOp( { 0: lhs.id, 1: rhs.id }, { 0: g._create_tensor_id("equal_out"), }, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def sub(lhs: Tensor, rhs: Tensor) -> Tensor: """Subtracts two Tensors element-wise. Follows numpy broadcasting rules. Arguments must have the same dtype. Args: lhs, rhs: Tensor Tensors to be subtracted. Returns: add: Tensor The value of (lhs - rhs)""" ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, lhs, rhs) settings = ctx._get_op_settings('sub') opid = _ir.OperatorIdentifier("ai.onnx", "Sub", 7, _ir.NumInputs(2, 2), 1) op = pb_g.createConnectedOp_SubtractOp( { 0: lhs.id, 1: rhs.id }, { 0: g._create_tensor_id("sub_out"), }, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def dynamic_slice(t: Tensor, index: Tensor, axes: List[int], sizes: List[int], no_overlap: bool) -> Tensor: """ Returns a cloned slice of the input Tensor. The word "dynamic" refers to the fact that the index can be specified during runtime. A slice along an axis can be defined as by the tuple ( start, stop, step ) start - will be equal the index for the respective axis stop - will be equal index + size for the respective axis step - will equal 1 Limitations: Assuming we would like to slice A with dimension (4, 3) - Step other than 1 is not supported (i.e. t[::2,:] is not supported) - Negative slicing is not supported (i.e. t[:-1,:] is not supported) - stop greater than the size of the axis is not supported (i.e. t[:5,:] is not supported) Args: t: Tensor Input tensor. index: Tensor The indices to start the slice from. axes: List[int] The axes to slice from. sizes: List[int] The sizes of the slices for the specified axes. For example: If index = [1, 2], axes = [0, 3] and sizes = [2, 4], the Tensor will be sliced t[1:2, :, :, 2:4] no_overlap : bool If set to true, then correct gradient backpropagation is only guaranteed if each region in the output tensor has exactly one populator (operation that writes data to this region). There are no run-time or compile-time checks possible to ensure this. Returns: out: Tensor A clone (i.e. not a view) of the sliced input tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t, index) settings = ctx._get_op_settings('dynamicslice') opid = _ir.OperatorIdentifier("ai.graphcore", "DynamicSlice", 1, _ir.NumInputs(2, 2), 1) op = pb_g.createConnectedOp_DynamicSliceOp( { 0: t.id, 1: index.id }, {0: g._create_tensor_id(f"dynamic_slice_out")}, opid, axes, sizes, no_overlap, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def test_remote_load_op(connected: bool, use_offset: bool, inplace: bool) -> None: """Test that the input and output tensors of remote load op are correct. Args: connected (bool): Whether to use the createConnected<opname> function or just create<opname> use_offset (bool): Whether or not to specify the optional offset Tensor inplace (bool): Whether or not to use the inplace version """ _, graphs = create_ir() g = graphs[0] t = add_actgrad_tensor("t", [1, 2, 3], g) opid = _ir.OperatorIdentifier("ai.onnx", "Init", 1, _ir.NumInputs(0, 0), 1) settings = _ir.Settings(g, "new_settings") out_id = "out_id" offset = add_actgrad_tensor("offset", [1], g) opCreator = g.createOp_RemoteLoadOp if not inplace else g.createOp_RemoteLoadInplaceOp connectedOpCreator = g.createConnectedOp_RemoteLoadOp if not inplace else g.createConnectedOp_RemoteLoadInplaceOp if use_offset: if connected: op = connectedOpCreator({ 0: t.id, 1: offset.id }, {0: "out_id"}, opid, settings, 1) else: op = opCreator(opid, settings, 1) op.connectInTensor(0, t.id) op.connectInTensor(1, offset.id) op.createAndConnectOutTensor(0, out_id) else: if connected: op = connectedOpCreator({ 0: t.id, }, {0: "out_id"}, opid, settings, 1) else: op = opCreator(opid, settings, 1) op.connectInTensor(0, t.id) op.createAndConnectOutTensor(0, out_id) op.setup() assert op.hasInput(0) assert op.inTensor(0) == t assert op.inId(0) == t.id if use_offset: assert op.hasInput(1) assert op.inTensor(1) == offset assert op.inId(1) == offset.id else: assert not op.hasInput(1) assert op.hasOutput(0) assert op.outId(0) == out_id
def split(t: Tensor, splits: Union[int, List[int]], axis: int = 0) -> List[Tensor]: """ Splits a tensor on a given axis into a list of tensors. Args: t: Tensor Tensor to be split. splits: int or List[int] Either an int which specifies the number of splits or a list of ints specifing the length of each output tensor. axis: int (default 0) Which axis to split on Returns: out: List[Tensor] A list of tensors """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) axis = handle_negative_axis(t, axis) if isinstance(splits, int): axis_len = t.shape[axis] if axis_len % splits != 0: raise ValueError( f"Split {splits} does not equally divide tensor axis {axis} of length {axis_len}." ) splits = [axis_len // splits] * splits outputs_t = { i: g._create_tensor_id(f"{t.name}_split_{i}") for i in range(len(splits)) } settings = ctx._get_op_settings('split') opid = _ir.OperatorIdentifier("ai.onnx", "Split", 2, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_SplitOp( {0: t.id}, outputs_t, axis_=axis, split_=splits, opid=opid, settings=settings, ) output = [ Tensor._from_pb_tensor(op.outTensor(i)) for i in range(len(splits)) ] return output
def remote_store( t: Tensor, offset: Optional[Tensor] = None, remote_buffer_handle: Optional[RemoteBufferHandle] = None) -> None: """Store the input tensor to a remote (off-chip) buffer. This Op is typically used when the user wants to store several different identically shaped tensors to the same remote buffer by specifying the offset (see below). Op instances with matching `remote_buffer_id` (specified in the `remote_buffer_handle`) will outline together, meaning that if multiple different tensors are to be stored under the same remote buffer ID, a different `offset` value has to be supplied for each tensor. The `remote_buffer_handle` handles the relationship between `remote_buffer_id`, shape and datatype as shape and datatype needs to be fixed for each `remote_buffer_id`. All `offset`s and `remote_buffer_id`s need to be >= 0. If `t` is of rank `x`, the remote buffer of a certain `remote_buffer_id` will be of rank `x+1`, where the new dimension (the row) will be of size `N`. See also: `remote_buffer_handle`, `remote_load`. Args: t (Tensor): Tensor to copy and store in the remote buffer. offset (Optional[Tensor], optional): Optional 0-rank Tensor. Specify the row in the remote buffer the inTensor will be written to. Defaults to None. remote_buffer_handle (Optional[RemoteBufferHandle], optional): The handle to the remote buffer. Defaults to None. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if offset is not None: check_in_graph(g, offset) remote_buffer_handle = prepare_remote_buffer(t, remote_buffer_handle, g) settings = ctx._get_op_settings('remote_store') opid = _ir.OperatorIdentifier("ai.graphcore", "RemoteStore", 1, _ir.NumInputs(1, 2), 0) if offset is not None: _ = pb_g.createConnectedOp_RemoteStoreOp({ 0: t.id, 1: offset.id }, {}, opid, settings, remote_buffer_handle.remote_buffer_id) else: _ = pb_g.createConnectedOp_RemoteStoreOp({ 0: t.id, }, {}, opid, settings, remote_buffer_handle.remote_buffer_id)
def remote_load( t: Tensor, offset: Optional[Tensor] = None, remote_buffer_handle: Optional[RemoteBufferHandle] = None) -> Tensor: """Load a tensor from remote (off-chip) buffer. The tensor will be loaded from the memory location corresponding to `remote_buffer_id` (specified in the `remote_buffer_handle`), and will be stored in the memory location corresponding to `t`. The relationship between `offset` and `remote_buffer_id` is thoroughly described in `remote_store`. See also: `remote_buffer_handle`, `remote_store`, `remote_load_` Args: t (Tensor): This tensor will be cloned, and the loaded data will written to the clone. offset (Optional[Tensor], optional): Optional 0-rank Tensor. Specify the row in the remote buffer the inTensor will be loaded from. Defaults to None. remote_buffer_handle (Optional[RemoteBufferHandle], optional): The handle to the remote buffer. Defaults to None. Returns: Tensor: The tensor loaded from the remote buffer """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if offset is not None: check_in_graph(g, offset) remote_buffer_handle = prepare_remote_buffer(t, remote_buffer_handle, g) settings = ctx._get_op_settings('remote_load') opid = _ir.OperatorIdentifier("ai.graphcore", "RemoteLoad", 1, _ir.NumInputs(1, 2), 1) if offset is not None: op = pb_g.createConnectedOp_RemoteLoadOp( { 0: t.id, 1: offset.id }, {0: g._create_tensor_id("remote_load_out")}, opid, settings, remote_buffer_handle.remote_buffer_id) else: op = pb_g.createConnectedOp_RemoteLoadOp( { 0: t.id, }, {0: g._create_tensor_id("remote_load_out")}, opid, settings, remote_buffer_handle.remote_buffer_id) return Tensor._from_pb_tensor(op.outTensor(0))
def gather(t: Tensor, indices: Tensor, axis: int = 0, available_memory_proportion: Optional[float] = None) -> Tensor: """ Select multiple elements from an array, given by `indices`, along a specified axis. When `axis == 0`, it is equivlent to numpy "fancy indexing". Pseudo example: ``` gather(x, [1, 2, 3]) == [x[3], x[7], x[2]] ``` Args: t: Tensor Input tensor indices: Tensor The indices of the elements to extract axis: int Which axis to gather on. Default is 0. available_memory_proportion: Optional[float] The maximum proportion of available memory on each tile that this layer should consume temporarily during the course of the operation. Defaults to 1.0 if not set globally. Returns: gather: Tensor The gathered elements concatenated. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) check_in_graph(g, indices) available_memory_proportion = convert_optional_float( available_memory_proportion) opid = _ir.OperatorIdentifier("ai.onnx", "Gather", 11, _ir.NumInputs(2, 2), 1) settings = ctx._get_op_settings("gather") op = pb_g.createConnectedOp_GatherOp( { 0: t.id, 1: indices.id }, {0: g._create_tensor_id("gather_out")}, opid=opid, axis_=axis, available_memory_proportion_=available_memory_proportion, settings=settings) return Tensor._from_pb_tensor(op.outTensor(0))
def remote_load_( t: Tensor, offset: Optional[Tensor] = None, remote_buffer_handle: Optional[RemoteBufferHandle] = None) -> Tensor: """Load a tensor from remote (off-chip) buffer inplace. This op is identical to `remote_load` with the exception of how `t` is handled. In `remote_load` `t` is cloned and the output is written to the clone, whereas in this version `t` is written to directly. See also: `remote_buffer_handle`, `remote_store`, `remote_load` Args: t (Tensor): The tensor the loaded data will written to the clone. offset (Optional[Tensor], optional): Optional 0-rank Tensor. Specify the row in the remote buffer the inTensor will be loaded from. Defaults to None. remote_buffer_handle (Optional[RemoteBufferHandle], optional): The handle to the remote buffer. Defaults to None. Returns: Tensor: The tensor loaded from the remote buffer """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if offset is not None: check_in_graph(g, offset) remote_buffer_handle = prepare_remote_buffer(t, remote_buffer_handle, g) settings = ctx._get_op_settings('remote_load_inplace') opid = _ir.OperatorIdentifier("ai.graphcore", "RemoteLoadInplace", 1, _ir.NumInputs(1, 2), 1) if offset is not None: op = pb_g.createConnectedOp_RemoteLoadInplaceOp( { 0: t.id, 1: offset.id }, {0: g._create_tensor_id("remote_load_inplace_out")}, opid, settings, remote_buffer_handle.remote_buffer_id) else: op = pb_g.createConnectedOp_RemoteLoadInplaceOp( { 0: t.id, }, {0: g._create_tensor_id("remote_load_inplace_out")}, opid, settings, remote_buffer_handle.remote_buffer_id) return Tensor._from_pb_tensor(op.outTensor(0))