def create_callback_stepio(data, anchors, start_times, end_times, batches_per_step): micro_batch_indices = defaultdict(int) # Input callback is called when the data is needed: def input_callback(id, is_prefetch: bool): if is_prefetch: input_time = time.perf_counter() start_times[id].append(input_time) return data[id][micro_batch_indices[id]] # Called after the input buffer has been consumed by the device: def input_complete_callback(id): micro_batch_indices[id] = \ (micro_batch_indices[id] + 1) % batches_per_step return # Output callback is called when a buffer is needed for the result: def output_callback(id): return anchors[id][micro_batch_indices[id]] # Complete callback is called when the output buffer has # been filled (result is ready to be consumed by the host): def output_complete_callback(id): output_time = time.perf_counter() end_times[id].append(output_time) micro_batch_indices[id] = \ (micro_batch_indices[id] + 1) % batches_per_step stepio = popart.PyStepIOCallback(input_callback, input_complete_callback, output_callback, output_complete_callback) return stepio
def _test(transposedInput, transposedOutput): builder = popart.Builder() # Create a random constant and transpose it np.random.seed(1) input1 = builder.addInputTensor("INT32", [2, 2]) # Run a session to prove this output1 = builder.aiOnnx.identity([input1]) builder.addOutputTensor(output1) anchorConfig = {output1: popart.AnchorReturnType("ALL")} dataFlow = popart.DataFlow(1, anchorConfig) deviceConfig = {'numIPUs': 1} dm = popart.DeviceManager() device = dm.createIpuModelDevice(deviceConfig) session = popart.InferenceSession(fnModel=builder.getModelProto(), dataFlow=dataFlow, deviceInfo=device) # Compile graph and place weights onto it session.prepareDevice() session.weightsFromHost() # Feed the session with a transposed (non-contiguous) tensor. input1Value = np.random.randint(0, 100, size=(2, 2), dtype='int32') if transposedInput: input1Value = np.transpose(input1Value, [1, 0]) output1Value = np.random.randint(0, 100, size=(2, 2), dtype='int32') if transposedOutput: output1Value = np.transpose(output1Value, [1, 0]) with pytest.raises( (Exception, RuntimeError, popart.popart_exception)) as e_info: def input_callback(id, prefetch): return input1Value def input_complete_callback(id): pass def output_callback(id): return output1Value def output_complete_callback(id): pass stepio = popart.PyStepIOCallback(input_callback, input_complete_callback, output_callback, output_complete_callback) session.run(stepio) assert "contiguous" in e_info.value.args[0]
def create_callback_stepio(data: dict, anchors: dict, start_times: DefaultDict[str, list], end_times: DefaultDict[str, list], batches_per_step: int, replication_factor: int): '''Create a popart.PyStepIOCallback using data and anchors. Will record timing information in start_times and end_times.''' input_callback_indices = defaultdict(int) # Input callback is called when the data is needed: def input_callback(tensor_id: str, is_prefetch: bool): input_time = time.perf_counter() start_times[tensor_id].append(input_time) idx = input_callback_indices[tensor_id] input_callback_indices[tensor_id] = (idx + 1) % (batches_per_step * replication_factor) return data[tensor_id][idx] # Called after the input buffer has been consumed by the device: def input_complete_callback(tensor_id: str): return output_callback_indices = defaultdict(int) # Output callback is called when a buffer is needed for the result: def output_callback(tensor_id: str): idx = output_callback_indices[tensor_id] output_callback_indices[tensor_id] = (idx + 1) % (batches_per_step * replication_factor) replica_idx = idx % replication_factor batch_idx = idx // replication_factor return anchors[tensor_id][batch_idx, replica_idx] # Complete callback is called when the output buffer has # been filled (result is ready to be consumed by the host): def output_complete_callback(tensor_id: str): output_time = time.perf_counter() end_times[tensor_id].append(output_time) stepio = popart.PyStepIOCallback(input_callback, input_complete_callback, output_callback, output_complete_callback) return stepio
def test_stepio_callbackinput(tmpdir): builder = popart.Builder() shape = popart.TensorInfo("FLOAT", [2]) i1 = builder.addInputTensor(shape) i2 = builder.addInputTensor(shape) o = builder.aiOnnx.add([i1, i2]) builder.addOutputTensor(o) proto = builder.getModelProto() batches_per_step = 2 dataFlow = popart.DataFlow( batches_per_step, { i1: popart.AnchorReturnType("All"), i2: popart.AnchorReturnType("All"), o: popart.AnchorReturnType("All") }) session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, deviceInfo=tu.create_test_device()) session.prepareDevice() anchors = session.initAnchorArrays() i1_data = np.random.rand(batches_per_step, 2).astype(np.float32) i2_data = np.random.rand(batches_per_step, 2).astype(np.float32) inputs = {i1: i1_data, i2: i2_data} i1_c = 0 i2_c = 0 def input_callback(id, prefetch): nonlocal i1_c, i2_c time.sleep(2) print("input_callback ", id) t = inputs[id] print(t) if id == i1: print("input_callback ", id, len(t)) if (i1_c < len(t)): result = t[i1_c] i1_c = i1_c + 1 if id == i2: print("input_callback ", id, len(t)) if (i2_c < len(t)): result = t[i2_c] i2_c = i2_c + 1 print(result) return result def input_complete_callback(id): print("input_complete_callback ", id) i1_d = 0 i2_d = 0 o_d = 0 def output_callback(id): nonlocal i1_d, i2_d, o_d time.sleep(2) print("output_callback ", id) t = anchors[id] if id == i1: result = t[i1_d] i1_d = i1_d + 1 if id == i2: result = t[i2_d] i2_d = i2_d + 1 if id == o: result = t[o_d] o_d = o_d + 1 return result def output_complete_callback(id): print("output_complete_callback ", id) stepio = popart.PyStepIOCallback(input_callback, input_complete_callback, output_callback, output_complete_callback) session.run(stepio) # confirm that writing device-to-host of a Stream Tensor returns correctly (unchanged) assert (np.allclose(anchors[i1], i1_data)) assert (np.allclose(anchors[i2], i2_data)) expected_result = i1_data + i2_data assert (np.allclose(anchors[o], expected_result))
def build_and_run_graph(data_size): # Create a builder object: builder = popart.Builder() # Specifiy two input vectors: data_spec = popart.TensorInfo("FLOAT", [data_size]) id_a = builder.addInputTensor(data_spec) id_b = builder.addInputTensor(data_spec) # Describe the computation: o1 = builder.aiOnnx.add([id_a, id_b]) o2 = builder.aiOnnx.mul([id_a, id_b]) # Designate the two output vectors and how # often the result will be required: builder.addOutputTensor(o1) builder.addOutputTensor(o2) dataFlow = popart.DataFlow( 1, {o1: popart.AnchorReturnType("ALL"), o2: popart.AnchorReturnType("ALL")}) # Setup an inference graph: proto = builder.getModelProto() session = popart.InferenceSession( fnModel=proto, dataFeed=dataFlow, deviceInfo=popart.DeviceManager().createIpuModelDevice({})) # Compile graph: session.prepareDevice() # Create input data buffers: data_a = np.random.rand(data_size).astype(np.float32) data_b = np.random.rand(data_size).astype(np.float32) inputs = {id_a: data_a, id_b: data_b} # Create output data buffers: anchors = session.initAnchorArrays() # Create timer objects and dictionaries: timer = PerfIntervalTimer() rtts = {} # Input callback is called when the data is needed: def input_callback(id, is_prefetch: bool): if is_prefetch: return if timer.not_set(): timer.reset() return inputs[id] # Called after the input buffer has been consumed: def input_complete_callback(id): return # Output callback is called when a buffer is needed for the result: def output_callback(id): return anchors[id] # Complete callback is called when the output buffer has # been filled (result is ready to be consumed by the host): def output_complete_callback(id): rtt = timer.interval() rtts[id] = rtt # Create the callback IO system: stepio = popart.PyStepIOCallback(input_callback, input_complete_callback, output_callback, output_complete_callback) # Run the graph and return timings: session.run(stepio) return rtts