def test_repeat_error(self, repeat_count: int): """Test an error is thrown with incorrect repeat_count Args: repeat_count (int): Number of times to repeat. """ ir = pir.Ir() main = ir.main_graph() with main: h2d = pir.h2d_stream((2, 16), pir.dtypes.float32) x = ops.host_load(h2d, "x") W = pir.variable(np.random.normal(0, 0.1, (16, 16)), name="W") b = pir.variable(np.zeros(16), name="b") linear = Linear() linear_graph = ir.create_graph(linear, x, out_features=16) with pytest.raises(ValueError) as e_info: y = ops.repeat(linear_graph, repeat_count, x, subgraph_in_to_parent_in={ linear.W: W, linear.b: b }) assert e_info.value.args[0].startswith( "Repeat trip count for repeat of")
def build_model_with_dot_checkpoints(ir: pir.Ir) -> None: """Make a model with 2 dot_checkpoints. Args: ir (pir.Ir): The ir to write to Returns: (tuple): tuple containing: ir._pb_ir (_ir.Ir): The underlying IR a_h2d (HostToDeviceStream): The host to device stream f_d2h (DeviceToHostStream): The device to host stream """ main = ir.main_graph() with main: a_h2d = pir.h2d_stream(_TENSOR_SHAPE, pir.float32, name="a_stream") a = ops.host_load(a_h2d, "a") b = pir.variable(np.random.rand(*_TENSOR_SHAPE).astype(np.float32), name="b") c = ops.add(a, b) ir.dot_checkpoint("Foo") d = pir.variable(np.random.rand(*_TENSOR_SHAPE).astype(np.float32), name="d") e = ops.mul(c, d) ir.dot_checkpoint("Bar") f = ops.gelu(e) f_d2h = pir.d2h_stream(_TENSOR_SHAPE, pir.float32, name="f_stream") ops.host_store(f_d2h, f)
def build(self, x: pir.Tensor, out_features: int, bias: bool = True) -> Tuple[pir.Tensor, ...]: x = ops.host_load(self.h2d, "x") self.W = subgraph_input((x.shape[-1], out_features), pir.float32, "W") y = x @ self.W if bias: self.b = subgraph_input((out_features, ), pir.float32, "b") y = y + self.b ops.host_store(self.d2h, y) return y, self.W, self.b
def test_fn(self): ir = pir.Ir() g = ir.main_graph() with g: h2d = pir.h2d_stream((), pir.dtypes.float32) x = ops.host_load(h2d, "x") assert len(g.get_tensors()) == 3 assert len(g.get_variables()) == 0 assert contains_op_of_type("HostLoad", _ir.op.exchange.HostLoadOp, g) assert contains_op_of_type("Init", _ir.op.InitOp, g)
def test_random_seed_setup(): ir = pir.Ir() main = ir.main_graph() with main: seed_h2d = pir.h2d_stream(shape=(2, ), dtype=dtypes.uint32, name='seed_stream') seed = ops.host_load(seed_h2d, 'seed') x = pir.variable(0.0) x = ops.dropout(x, seed + 1, p=0.1) y = ops.dropout(x, seed + 2, p=0.7) y_d2h = pir.d2h_stream(y.shape, y.dtype, name="y_stream") ops.host_store(y_d2h, y) replicas = 4 parent_seed = 1984 seed_tensors = pir.create_seeds(parent_seed, replicas=replicas) ## Run the program ir = ir._pb_ir # Internal ir y_id = y_d2h.tensor_id() dataFlow = popart.DataFlow( batchesPerStep=1, anchorTensors={y_id: popart.AnchorReturnType("All")}) ir.setDataFlow(dataFlow) opts = ir.getSessionOptions() opts.useHostCopyOps = True opts.enableExplicitMainLoops = True opts.aliasZeroCopy = True opts.explicitRecomputation = True opts.enableReplicatedGraphs = True opts.replicatedGraphCount = replicas ir.updateVertices() device = popart.DeviceManager().createIpuModelDevice({"numIPUs": replicas}) session = popart.InferenceSession.fromIr(ir=ir, deviceInfo=device) session.prepareDevice() # Create buffers for anchors anchors = session.initAnchorArrays() # Run the model stepio = popart.PyStepIO(inputs={seed_h2d.tensor_id(): seed_tensors}, outputs=anchors) session.weightsFromHost() session.run(stepio)
def test_copy_to(self): ir = pir.Ir() g = ir.main_graph() with g: h2d = pir.h2d_stream((), pir.dtypes.float32) x = ops.host_load(h2d, "x") x_io = ops.io_tile_copy(x) g_ops = g._pb_graph.getOps() assert len(g_ops) == 3 io_copy = g_ops[-1] assert isinstance(io_copy, _ir.op.IoTileCopyOp) io_copy.getSettings().tileSet == _ir.TileSet.IO
def test_repeat_subgraph(self, repeat_count: int): """Do: host load x x = (x * W) + b <-- repeat `repeat_count` times. host store x Args: repeat_count (int): How many times to repeat. """ ir = pir.Ir() main = ir.main_graph() with main: x_h2d = pir.h2d_stream((16, 16), pir.float32, name="x_stream") x = ops.host_load(x_h2d, "x") W_data = np.random.normal(0, 1, (16, 16)).astype(np.float32) W = pir.variable(W_data, name="W") b_data = np.random.normal(0, 0.4, (16)).astype(np.float32) b = pir.variable(b_data, name="b") linear = Linear() linear_graph = ir.create_graph(linear, x, out_features=16) y, W, b = ops.repeat(linear_graph, repeat_count, x, subgraph_in_to_parent_in={ linear.W: W, linear.b: b }) y_d2h = pir.d2h_stream((16, 16), pir.float32, name="y_stream") ops.host_store(y_d2h, y) data = np.random.random((16, 16)).astype(np.float32) r_y = run_ir(ir, bps=1, y_id=y_d2h.tensor_id(), inputs={x_h2d.tensor_id(): data}) out = data for _ in range(repeat_count): out = np.matmul(out, W_data, dtype=np.float32) + b_data assert r_y.shape == out.shape # Multiple matmuls mean the values get pretty large and fp differences are common. Hence # the large rtol and atol. assert np.allclose(r_y, out, rtol=1e-04, atol=1e-03)
def build_model(weights_data: np.array, input_shape: Tuple[int] ) -> Tuple[_ir.Ir, HostToDeviceStream, DeviceToHostStream]: """Build the model using popart.ir API. Args: weights_data (np.array): The (non-streamed) data of the weights input_shape (tuple): The shape of the streamed input tensor Returns: (tuple): tuple containing: ir._pb_ir(_ir.Ir): The underlying IR t_in_h2d(HostToDeviceStream): The input stream of t_in t_out_d2h (DeviceToHostStream): The output stream of t_out """ ir = pir.Ir() main = ir.main_graph() with main: weights = pir.variable(weights_data, name="weights") # Load t_in from host t_in_h2d = pir.h2d_stream(input_shape, pir.float32, name="t_in_stream") # Operations on IPU 0 with pir.virtual_graph(0): t_in = ops.host_load(t_in_h2d, "t_in") t_1 = ops.matmul(t_in, weights) # Copy to IPU 1 t_1_c = ops.ipu_copy(t_1, 1) # Operations on IPU 1 with pir.virtual_graph(1): t_2 = ops.gelu(t_1_c) # Copy to IPU 2 t_2_c = ops.ipu_copy(t_2, 2) # Operations on IPU 2 with pir.virtual_graph(2): t_out = ops.softmax(t_2_c, axis=1) t_out_d2h = pir.d2h_stream(t_out.shape, pir.float32, name="t_out_stream") ops.host_store(t_out_d2h, t_out) return ir._pb_ir, t_in_h2d, t_out_d2h
def test_subgraph(): class ScaleNShift(pir.Module): def __init__(self): self.W: pir.Tensor = None self.b: pir.Tensor = None def build(self, x: pir.Tensor, out_features: int, bias: bool = True) -> pir.Tensor: self.W = pir.subgraph_input((x.shape[-1], out_features), pir.float32, "W") y = ops.mul(x, self.W) if bias: self.b = pir.subgraph_input((out_features, ), pir.float32, "b") y = y + self.b return y ir = pir.Ir() main = ir.main_graph() with main: h2d = pir.h2d_stream((16, 16), pir.dtypes.float32) x = ops.host_load(h2d, "x") W = pir.variable(np.random.normal(0, 0.1, (16, 16)), name="W") b = pir.variable(np.zeros(16), name="b", dtype=pir.dtypes.float32) ss = ScaleNShift() ss_graph = ir.create_graph(ss, x, out_features=16) call_info = ops.call_with_info(ss_graph, x, subgraph_in_to_parent_in={ ss.W: W, ss.b: b }) y = call_info.get_output_tensors()[0] d2h = pir.d2h_stream(y.shape, y.dtype) ops.host_store(d2h, y) assert len(ss_graph.get_input_tensors()) == 3 assert len(ss_graph.get_output_tensors()) == 1 ss_bwd_info = pir.transforms.autodiff.autodiff(ss_graph) # Check an additional output has been added to the fwd graph. assert len(ss_graph.get_output_tensors()) == 2 bwd_graph = ss_bwd_info.graph assert isinstance(bwd_graph, pir.Graph) assert len(ss_bwd_info.expected_inputs) == len( bwd_graph.get_input_tensors()) assert len(ss_bwd_info.expected_outputs) == len( bwd_graph.get_output_tensors()) for op in bwd_graph._pb_graph.getOps(): grad_ops = (_ir.op.SumOp, _ir.op.MulArg0GradOp, _ir.op.MulArg1GradOp, _ir.op.AddArg0GradOp, _ir.op.AddArg1GradOp) assert isinstance(op, grad_ops) with main: grad_seed = pir.constant(np.ones((16, 16), np.float32)) activations = pir.transforms.autodiff.get_expected_forward_inputs_from_call( call_info, ss_bwd_info) grads = ops.call(bwd_graph, grad_seed, subgraph_in_to_parent_in=activations) assert len(grads) == len(ss_bwd_info.expected_outputs)
def build_model( ) -> Tuple[_ir.Ir, pir.HostToDeviceStream, pir.DeviceToHostStream, pir. DeviceToHostStream, pir.DeviceToHostStream, pir. DeviceToHostStream, np.ndarray, np.ndarray]: ir = pir.Ir() main = ir.main_graph() with main: x_h2d = pir.h2d_stream(_IN_SHAPE, pir.float32, name="x_stream") x = ops.host_load(x_h2d, "x") W_data = np.random.normal(0, 0.1, _WEIGHT_SHAPE).astype(np.float32) b_data = np.zeros(_BIAS_SHAPE, dtype=np.float32) W = pir.variable(W_data, name="W") b = pir.variable(b_data, name="b") lin = Linear() lin_graph = ir.create_graph(lin, x, out_features=_OUT_FEATURES) lin_call_info = ops.call_with_info(lin_graph, x, subgraph_in_to_parent_in={ lin.W: W, lin.b: b }) y = lin_call_info.get_output_tensors()[0] assert y.shape == _OUT_SHAPE y_d2h = pir.d2h_stream(y.shape, y.dtype, name="x_stream") ops.host_store(y_d2h, y) lin_bwd_info = pir.transforms.autodiff.autodiff(lin_graph) lin_bwd_graph = lin_bwd_info.graph with main: grad_seed = pir.constant(np.ones(_OUT_SHAPE, np.float32)) tensors_required_for_bwd = pir.transforms.autodiff.get_expected_forward_inputs_from_call( lin_call_info, lin_bwd_info) lin_bwd_call_info = ops.call_with_info( lin_bwd_graph, grad_seed, subgraph_in_to_parent_in=tensors_required_for_bwd) ##### Extract parent graph x_grad, W_grad, b_grad expected_outputs = lin_bwd_info.expected_outputs x_grad, W_grad, b_grad = None, None, None sg_x = lin_call_info.op_in_to_subgraph_in_tensor(x) sg_W = lin_call_info.op_in_to_subgraph_in_tensor(W) sg_b = lin_call_info.op_in_to_subgraph_in_tensor(b) def get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection( ec: pir.transforms.autodiff.ExpectedConnection) -> pir.Tensor: # If (t, FwdGrad) appears at index i in expected_outputs, it is # guaranteed that t’ (the grad of t) appears at output index i in the # grad graph. sg_out_idx = expected_outputs.index(ec) op_out_idx = lin_bwd_call_info.subgraph_in_to_op_in_index(sg_out_idx) parent_grad = lin_bwd_call_info.get_op_output_tensor(op_out_idx) return parent_grad for ec in expected_outputs: # Should always be the case for expected_outputs assert ec.connection_type == pir.transforms.autodiff.ExpectedConnectionType.FwdGrad sg_fwd_tensor = ec.fwd_tensor if sg_fwd_tensor == sg_x: x_grad = get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection( ec) elif sg_fwd_tensor == sg_W: W_grad = get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection( ec) elif sg_fwd_tensor == sg_b: b_grad = get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection( ec) assert x_grad is not None assert W_grad is not None assert b_grad is not None # HostStore grads and collect d2h streams def host_store_and_return_d2h_stream( grad: pir.Tensor) -> pir.DeviceToHostStream: with main: d2h = pir.d2h_stream(grad.shape, grad.dtype, name=grad.name + "_stream") ops.host_store(d2h, grad) return d2h x_grad_d2h = host_store_and_return_d2h_stream(x_grad) W_grad_d2h = host_store_and_return_d2h_stream(W_grad) b_grad_d2h = host_store_and_return_d2h_stream(b_grad) assert x_grad_d2h is not None assert W_grad_d2h is not None assert b_grad_d2h is not None return ir._pb_ir, x_h2d, y_d2h, x_grad_d2h, W_grad_d2h, b_grad_d2h, W_data, b_data
def build_and_run(cache_path): ir = pir.Ir() main = ir.main_graph() with main: # Test host to device x_h2d = pir.h2d_stream((1, ), pir.float32, name='x_stream') x = ops.host_load(x_h2d, 'x') # Test variable y = pir.variable(4.0, name='y') z = ops.add(x, y) # Test random op seed_h2d = pir.h2d_stream(shape=(2, ), dtype=dtypes.uint32, name='seed_stream') seed = ops.host_load(seed_h2d, 'seed') r = ops.random_normal(seed, (1, )) z = ops.add(z, r) # Test device to host z_d2h = pir.d2h_stream(z.shape, z.dtype, name="z_stream") ops.host_store(z_d2h, z) # Create seed parent_seed = 1984 seed_tensors = pir.create_seeds(parent_seed, batches_per_step=1) ## Run the program ir = ir._pb_ir # Internal ir dataFlow = popart.DataFlow( batchesPerStep=1, anchorTensors={z_d2h.tensor_id(): popart.AnchorReturnType("All")}) ir.setDataFlow(dataFlow) ir.updateVertices() opts = ir.getSessionOptions() opts.useHostCopyOps = True opts.enableExplicitMainLoops = True opts.aliasZeroCopy = True opts.explicitRecomputation = True # Enable engine caching opts.enableEngineCaching = True opts.cachePath = cache_path device = tu.create_test_device() session = popart.InferenceSession.fromIr(ir=ir, deviceInfo=device) session.prepareDevice() # Create buffers for anchors anchors = session.initAnchorArrays() inputs = { x_h2d.tensor_id(): np.array(3.0, dtype='float32'), seed_h2d.tensor_id(): seed_tensors, } # Run the model stepio = popart.PyStepIO(inputs=inputs, outputs=anchors) session.weightsFromHost() session.run(stepio) output = anchors['z_stream'] device.detach() return output
# Copyright (c) 2021 Graphcore Ltd. All rights reserved. import numpy as np import popart.ir as pir import popart.ir.ops as ops import popart # Creating a model with popart.ir ir = pir.Ir() main = ir.main_graph() with main: # host load input0 = pir.h2d_stream([1], pir.float32, name="input0_stream") a = ops.host_load(input0, "a") input1 = pir.h2d_stream([1], pir.float32, name="input1_stream") b = ops.host_load(input1, "b") # addition o = ops.add(a, b) # host store o_d2h = pir.d2h_stream(o.shape, o.dtype, name="output_stream") ops.host_store(o_d2h, o) dataFlow = popart.DataFlow( batchesPerStep=1, anchorTensors={o_d2h.tensor_id(): popart.AnchorReturnType("All")}) ir = ir._pb_ir ir.setDataFlow(dataFlow) opts = ir.getSessionOptions() opts.useHostCopyOps = True