def body(img): with scopes.ipu_scope('/device:IPU:0'): if mode == 'sharded': with autoshard.ipu_autoshard(): probs = tf.import_graph_def( network.optimized_graph, input_map={network.graph_input: img}, name="optimized", return_elements=[network.graph_output])[0] autoshard.automatic_sharding(num_shards=num_ipus, input_ts=img, loss_ts=probs, frozen_inference=True) outfeed_op = outfeed_queue.enqueue(probs) outfeed_op._set_attr( sharding._XLA_SHARDING, attr_value_pb2.AttrValue( s=probs.op.get_attr('_XlaSharding'))) else: probs = tf.import_graph_def( network.optimized_graph, input_map={network.graph_input: img}, name="optimized", return_elements=[network.graph_output])[0] outfeed_op = outfeed_queue.enqueue(probs) # Note that enqueue happens on the IPU. return outfeed_op
def auto_sharding(pa, pb, pc): # This context marks the section of the graph to autoshard. # In this case we want to autoshard across the whole graph # so this context isn't actually required. with autoshard.ipu_autoshard(): o1 = pa + pb o2 = pa + pc out = o1 + o2 return out