def test_pad_sum5(op_tester): d1 = np.random.rand(2).astype(np.float32) d2 = np.random.rand(2).astype(np.float32) d3 = np.random.rand(2).astype(np.float32) def init_builder(builder): i1 = builder.addInputTensor(d1) i2 = builder.addInputTensor(d2) i3 = builder.addInputTensor(d3) i1 = builder.aiOnnx.pad([i1], [2, 6], 'constant', 0) i2 = builder.aiOnnx.pad([i2], [4, 4], 'constant', 0) i3 = builder.aiOnnx.pad([i3], [6, 2], 'constant', 0) o = builder.aiOnnx.sum([i1, i2, i3]) builder.addOutputTensor(o) return [o] def reference(ref_data): i1 = np.pad(d1, [(2, 6)], 'constant') i2 = np.pad(d2, [(4, 4)], 'constant') i3 = np.pad(d3, [(6, 2)], 'constant') return [i1 + i2 + i3] op_tester.setPatterns(['PadSum'], enableRuntimeAsserts=False) op_tester.run(init_builder, reference, 'infer')
def test_careful_inplacing(op_tester): """ Check that an Op, in a sugbraph, with an input which is a graph output, is not inplace. Example below : the Op with * cannot be inplaced. Input = [1,1] | ------------ | scale-2 | | | | | scale-2 | | | | | scale-2-|-------- add --- final output is [72, 72] | | | / ([8, 8] + [64, 64]) | *scale-2 | / | | | / | scale-2 | / | | | / | scale-2-|--/ | | ------------ """ d0 = np.asarray([1., 1.]).astype(np.float32) def get_init_builder(): def init_builder(builder): i0 = builder.addInputTensor(d0) subgraph_builder = builder.createSubgraphBuilder() subgraph_builder.addInputTensorFromParentGraph(i0) i1 = subgraph_builder.aiGraphcore.scale([i0], 2.0, "hoop1") i2 = subgraph_builder.aiGraphcore.scale([i1], 2.0, "hoop2") i3 = subgraph_builder.aiGraphcore.scale([i2], 2.0, "hoop3") i4 = subgraph_builder.aiGraphcore.scale([i3], 2.0, "hoop4") i5 = subgraph_builder.aiGraphcore.scale([i4], 2.0, "hoop5") i6 = subgraph_builder.aiGraphcore.scale([i5], 2.0, "hoop5") subgraph_builder.addOutputTensor(i3) # 8 subgraph_builder.addOutputTensor(i6) #64 outs = builder.aiGraphcore.call([i0], 2, subgraph_builder) summation = builder.aiOnnx.add([outs[0], outs[1]]) builder.addOutputTensor(summation) return [summation] return init_builder def reference(ref_data): return [np.array([72., 72.]).astype(np.float32)] op_tester.setPatterns(['InPlace'], enableRuntimeAsserts=False) op_tester.run(get_init_builder(), reference, 'infer')
def test_pad_sum6(op_tester): """ The output tensor with be (2, 20, 2) This dimensions along the axis=1 that the inputs go are, d1 -> [17,18) d2 -> [8, 10) d3 -> [10, 13) d4 -> [3, 7) Looks like this: ...[..].[][.]....|.. """ d1 = np.random.rand(2, 1, 2).astype(np.float32) d2 = np.random.rand(2, 2, 2).astype(np.float32) d3 = np.random.rand(2, 3, 2).astype(np.float32) d4 = np.random.rand(2, 4, 2).astype(np.float32) def init_builder(builder): def getPadded(start, width, name): return builder.aiOnnx.pad([name], [0, start, 0, 0, 20 - start - width, 0], 'constant', 0) i1 = builder.addInputTensor(d1) i2 = builder.addInputTensor(d2) i3 = builder.addInputTensor(d3) i4 = builder.addInputTensor(d4) i1 = getPadded(17, 1, i1) i2 = getPadded(8, 2, i2) i3 = getPadded(10, 3, i3) i4 = getPadded(3, 4, i4) o = builder.aiOnnx.sum([i1, i2, i3, i4]) builder.addOutputTensor(o) return [o] def reference(ref_data): def getPadded(name, start, width): return np.pad(name, [(0, 0), (start, 20 - start - width), (0, 0)], 'constant') i1 = getPadded(d1, 17, 1) i2 = getPadded(d2, 8, 2) i3 = getPadded(d3, 10, 3) i4 = getPadded(d4, 3, 4) return [i1 + i2 + i3 + i4] op_tester.setPatterns(['PadSum'], enableRuntimeAsserts=False) op_tester.run(init_builder, reference, 'infer')
def test_pad_sum7(op_tester): d1 = np.random.rand(2, 4).astype(np.float32) def init_builder(builder): i1 = builder.addInputTensor(d1) s1_axes = builder.aiOnnx.constant(np.array([0]).astype(np.int64)) s1_starts = builder.aiOnnx.constant(np.array([0]).astype(np.int64)) s1_ends = builder.aiOnnx.constant(np.array([1]).astype(np.int64)) s1 = builder.aiOnnx.slice([i1, s1_starts, s1_ends, s1_axes]) s2_axes = builder.aiOnnx.constant(np.array([0]).astype(np.int32)) s2_starts = builder.aiOnnx.constant(np.array([1]).astype(np.int32)) s2_ends = builder.aiOnnx.constant(np.array([2]).astype(np.int32)) s2 = builder.aiOnnx.slice([i1, s2_starts, s2_ends, s2_axes]) s3_axes = builder.aiOnnx.constant(np.array([0]).astype(np.int64)) s3_starts = builder.aiOnnx.constant(np.array([1]).astype(np.int64)) s3_ends = builder.aiOnnx.constant(np.array([3]).astype(np.int64)) s3 = builder.aiOnnx.slice([i1, s3_starts, s3_ends, s3_axes]) c1 = builder.aiOnnx.concat([s1, s2, s3], 0) u1 = builder.aiOnnx.unsqueeze([c1], axes=[0]) o = u1 builder.addOutputTensor(o) return [ o, popart.reservedGradientPrefix() + i1, popart.reservedGradientPrefix() + o ] def reference(ref_data): i1 = torch.tensor(d1, requires_grad=True) s1 = i1[0:1] s2 = i1[1:2] s3 = i1[1:3] c1 = torch.cat((s1, s2, s3), 0) u1 = torch.unsqueeze(c1, 0) o = u1 d__o = ref_data.getOutputTensorGrad(0) o.backward(torch.tensor(d__o)) return [o, i1.grad, None] op_tester.setPatterns(['PadSum', 'OpToReshape'], enableRuntimeAsserts=False) op_tester.run(init_builder, reference, 'train')
def test_pad_sum1(op_tester): d1 = np.random.rand(2, 2, 2).astype(np.float32) d2 = np.random.rand(2, 2, 2).astype(np.float32) def init_builder(builder): i1 = builder.addInputTensor(d1) i2 = builder.addInputTensor(d2) i1 = builder.aiOnnx.pad([i1], [0, 2, 0, 0, 0, 0], 'constant', 0) i2 = builder.aiOnnx.pad([i2], [0, 0, 0, 0, 2, 0], 'constant', 0) o = builder.aiOnnx.sum([i1, i2]) builder.addOutputTensor(o) return [o] def reference(ref_data): i1 = np.pad(d1, [(0, 0), (2, 0), (0, 0)], 'constant') i2 = np.pad(d2, [(0, 0), (0, 2), (0, 0)], 'constant') return [i1 + i2] op_tester.setPatterns(['PadSum'], enableRuntimeAsserts=False) op_tester.run(init_builder, reference, 'infer')
def test_dont_inplace_when_aliased_inputs(op_tester): data = np.random.randn(5, 3, 16, 16).astype(np.float32) def init_builder(builder): x = builder.addInputTensor(data) # Inplacing of Concat results in a race condition in an already # inplace Add (AddLhsInPlace). concatenated = builder.aiOnnx.concat([x, x], 0) starts = builder.aiOnnx.constant(np.array([4]).astype(np.int32)) ends = builder.aiOnnx.constant(np.array([9]).astype(np.int32)) axes = builder.aiOnnx.constant(np.array([0]).astype(np.int32)) sliced = builder.aiOnnx.slice([concatenated, starts, ends, axes]) result = builder.aiOnnx.add([x, sliced]) return [result] def reference(ref_data): sliced = np.concatenate([data, data], axis=0)[4:9] return [data + sliced] op_tester.setPatterns(['InPlace'], enableRuntimeAsserts=False) op_tester.run(init_builder, reference, 'infer')
def test_weight_update_replicated(op_tester): A = np.random.rand(2, 4).astype(np.float32) B = np.ones((4, 6)).astype(np.float32) C = np.random.rand(2, 6).astype(np.float32) alpha = np.random.random(1).astype(np.float32)[0] beta = np.random.random(1).astype(np.float32)[0] transA = False transB = False def init_builder(builder): i1 = builder.addInputTensor(A) i2 = builder.addInitializedInputTensor(B) i3 = builder.addInitializedInputTensor(C) o = builder.aiOnnx.gemm([i1, i2, i3], alpha, beta, transA, transB) builder.addOutputTensor(o) return [ o, popart.reservedGradientPrefix() + i2, i2, popart.reservedGradientPrefix() + i3, i3, "scaledLearningRate0___default___FLOAT", "weightDecayScaleFactor0___default___FLOAT" ] def reference(ref_data): class Module(torch.nn.Module): def __init__(self): super(Module, self).__init__() self.b = torch.tensor(B, requires_grad=True) self.c = torch.tensor(C, requires_grad=True) # Create the weight tensors for pytorch self.B = torch.nn.Parameter(self.b, requires_grad=True) self.C = torch.nn.Parameter(self.c, requires_grad=True) self.matmul = torch.matmul def forward(self, inputs): # Perform the GEMM operation x = alpha * self.matmul(inputs[0], self.B) + beta * self.C return x module = Module() module.train() optimizer = torch.optim.SGD(module.parameters(), lr=0.01, weight_decay=0.0, momentum=0.0) a = torch.tensor(A, requires_grad=True) optimizer.zero_grad() outputs = () # graph with gradient accumlation i.e. only update the weights after x passes for n in range(replicationFactor): # adding n as offset, as op_tester expects o = module([a + n]) outputs = outputs + (o, ) loss = torch.nn.L1Loss(reduction="sum") target = torch.zeros(o.size()) output = loss(o, target) output.backward() # Update the weights optimizer.step() # Add dimension to each output so we can concatenate them outputs = tuple(map(lambda x: torch.unsqueeze(x, 0), outputs)) return [ torch.cat(outputs), module.b.grad, module.B.data, module.c.grad, module.C.data, np.array([0.01, 0.01, 0.01, 0.01], np.float32), np.array([1, 1, 1, 1], np.float32) ] op_tester.lossReduction = popart.ReductionType.Sum op_tester.setPatterns( ['GemmDecomposition', 'PreUniRepl', 'MatMulRhsGradOp', 'OpToReshape'], enableRuntimeAsserts=False) op_tester.options.enableReplicatedGraphs = True op_tester.options.replicatedGraphCount = replicationFactor op_tester.device = tu.create_test_device(numIpus=replicationFactor) if not op_tester.device: raise RuntimeError( "Failed to acquire IPU device in training graph replication test") op_tester.numIPUs = replicationFactor op_tester.run(init_builder, reference, 'train', optimizer=popart.SGD({"defaultLearningRate": (0.01, False)}))
def test_replication_infer(op_tester): # 2 samples per device A = np.random.rand(2, 7).astype(np.float32) B = np.random.rand(7, 6).astype(np.float32) C = np.random.rand(1, 6).astype(np.float32) alpha = 1.0 beta = 1.0 transA = False transB = False def init_builder(builder): i1 = builder.addInputTensor(A) i2 = builder.addInitializedInputTensor(B) i3 = builder.addInitializedInputTensor(C) o = builder.aiOnnx.gemm([i1, i2, i3], alpha, beta, transA, transB) builder.addOutputTensor(o) return [o] def reference(ref_data): class Module(torch.nn.Module): def __init__(self): super(Module, self).__init__() self.B = torch.nn.Parameter(torch.tensor(B)) self.C = torch.nn.Parameter(torch.tensor(C)) self.matmul = torch.matmul def forward(self, inputs): x = 1.0 * self.matmul(inputs[0], self.B) + 1.0 * self.C return x module = Module() module.eval() a = torch.tensor(A) b = torch.tensor(B) c = torch.tensor(C) # forward # Run the pytorch module multiple times to simulate the same # behaviour as popart. The offsets (with corresponding offsets # in op_tester) ensure that the samples are distinct between replicas o1 = module([a + 0.]) o2 = module([a + 1.]) o3 = module([a + 2.]) o4 = module([a + 3.]) return [ torch.cat((torch.unsqueeze(o1, 0), torch.unsqueeze(o2, 0), torch.unsqueeze(o3, 0), torch.unsqueeze(o4, 0))) ] op_tester.setPatterns(['GemmDecomposition', 'PreUniRepl', 'OpToReshape'], enableRuntimeAsserts=False) op_tester.options.enableReplicatedGraphs = True op_tester.options.replicatedGraphCount = replicationFactor op_tester.device = tu.create_test_device(replicationFactor) if not op_tester.device: raise RuntimeError( "Failed to acquire IPU device in inference graph replication test") op_tester.numIPUs = replicationFactor op_tester.run(init_builder, reference, 'infer')
def test_weight_update(op_tester): A = np.ones((2, 4)).astype(np.float32) B = np.ones((4, 6)).astype(np.float32) C = np.zeros((2, 6)).astype(np.float32) alpha = 1.0 beta = 1.0 transA = False transB = False def init_builder(builder): i1 = builder.addInputTensor(A) i2 = builder.addInitializedInputTensor(B) i3 = builder.addInitializedInputTensor(C) o = builder.aiOnnx.gemm([i1, i2, i3], alpha, beta, transA, transB) builder.addOutputTensor(o) return [ o, popart.reservedGradientPrefix() + i2, i2, i3, "scaledLearningRate0___default___FLOAT", "weightDecayScaleFactor0___default___FLOAT" ] def reference(ref_data): class Module(torch.nn.Module): def __init__(self): super(Module, self).__init__() self.B = torch.nn.Parameter(torch.tensor(B), requires_grad=True) self.C = torch.nn.Parameter(torch.tensor(C), requires_grad=True) self.matmul = torch.matmul def forward(self, inputs): x = 1.0 * self.matmul(inputs[0], self.B) + 1.0 * self.C return x module = Module() a = torch.tensor(A, requires_grad=True) # forward o = module([a]) return [ o, module.B.grad, module.B.data, module.C.data, np.float32(0.01), np.float32(1.0) ] op_tester.device = tu.create_test_device() op_tester.numIPUs = 1 op_tester.setPatterns( ['GemmDecomposition', 'PreUniRepl', 'MatMulRhsGradOp', 'OpToReshape'], enableRuntimeAsserts=False) op_tester.run(init_builder, reference, 'train', optimizer=popart.SGD({"defaultLearningRate": (0.01, False)}))