preProbSquared = x * x l1loss = torch.sum(0.01 * torch.abs(preProbSquared)) window_size = (int(x.size()[2]), int(x.size()[3])) x = torch.nn.functional.avg_pool2d(x, kernel_size=window_size) x = torch.squeeze(x) probs = self.softmax(x) logprobs = torch.log(probs) nll = nllloss(logprobs, labels) loss = l1loss + nll return loss # Set arbitrary seed so model weights are initialized to the # same values each time the test is run torch.manual_seed(1) torchWriter = torchwriter.PytorchNetWriter( inNames=inNames, outNames=outNames, optimizer=popart.ConstSGD(0.001), inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, ### Torch specific: module=Module0(), samplesPerBatch=batchSize) c10driver.run(torchWriter, willowOptPatterns, args.outputdir, cifarInIndices, args.device, args.hw_id)
x = torch.nn.functional.avg_pool2d(x, kernel_size=window_size, stride=window_size) preprobs = torch.squeeze(x) probs = self.softmax(preprobs) logprobs = torch.log(probs) loss = nllloss(logprobs, inputs[1]) return loss # Set arbitrary seed so model weights are initialized to the # same values each time the test is run torch.manual_seed(1) torchWriter = torchwriter.PytorchNetWriter( inNames=inNames, outNames=outNames, # large weight_decay term to test that it is definitely working optimizer=popart.SGD({ "defaultLearningRate": (0.001, False), "defaultWeightDecay": (10.0, False) }), inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, ### Torch specific: module=Module0(), samplesPerBatch=batchSize) c10driver.run(torchWriter, None, args.outputdir, cifarInIndices, args.device, args.hw_id)
def forward(self, inputs): """out = relu(conv(in))""" image0 = inputs[0] x = self.conv1(image0) x = self.relu(x) return x # Set arbitrary seed so model weights are initialized to the # same values each time the test is run torch.manual_seed(1) torchWriter = torchwriter.PytorchNetWriter( inNames=inNames, outNames=outNames, optimizer=optimizer, inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, ### Torch specific: module=Module0(), samplesPerBatch=samplesPerBatch) # Passes if torch and popart models match c10driver.run(torchWriter=torchWriter, patterns=None, outputdir=args.outputdir, cifarInIndices=cifarInIndices, device=args.device, device_hw_id=args.hw_id, mode="infer")
x = self.sin(image0) x = self.conv1(x) x = self.sin(x) x = self.conv2(x) window_size = (int(x.size()[2]), int(x.size()[3])) x = torch.nn.functional.avg_pool2d(x, kernel_size=window_size) x = torch.squeeze(x) # This is the where the GEMM happens: x = self.linear(x) out = torch.sum(0.1 * torch.abs(x)) return out # Set arbitrary seed so model weights are initialized to the # same values each time the test is run torch.manual_seed(1) torchWriter = torchwriter.PytorchNetWriter( inNames=inNames, outNames=outNames, # large weight_decay term to test that it is definitely working optimizer=popart.ConstSGD(learning_rate=0.001, weight_decay=10), inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, ### Torch specific: module=Module0(), samplesPerBatch=batchSize) c10driver.run(torchWriter, None, args.outputdir, cifarInIndices, args.device, args.hw_id)
] class Module0(torch.nn.Module): def __init__(self): torch.nn.Module.__init__(self) self.a = torch.nn.Parameter(torch.rand(2, 1, 4, 5, 6)) self.b = torch.nn.Parameter(torch.rand(3, 1, 6, 7)) self.matmul = torch.matmul def forward(self, inputs): return self.matmul(self.a, self.b) # Set arbitrary seed so model weights are initialized to the # same values each time the test is run torch.manual_seed(1) torchWriter = torchwriter.PytorchNetWriter( inNames=inNames, outNames=outNames, losses=losses, optimizer=popart_core.ConstSGD(0.001), inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, ### Torch specific: module=Module0()) c10driver.run(torchWriter, willowOptPasses, args.outputdir, cifarInIndices, args.device, args.hw_id)
return preProbSquared, probs # Set arbitrary seed so model weights are initialized to the # same values each time the test is run torch.manual_seed(1) # Test with ConstSGD anchors_1 = c10driver.run( torchwriter.PytorchNetWriter( inNames=inNames, outNames=outNames, losses=losses, # default loss scaling (1.0f) optimizer=popart.SGD({"defaultLearningRate": (0.001, True)}), inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, ### Torch specific: module=Module0(), samplesPerBatch=batchSize), willowOptPasses, args.outputdir, cifarInIndices, args.device, args.hw_id) # Test with ConstSGD anchors_2 = c10driver.run( torchwriter.PytorchNetWriter( inNames=inNames,