Esempio n. 1
0
def test_weight_overwrite_trained_weight():
    torch.manual_seed(42)
    model = torch.nn.Linear(10, 10)

    poptorch_model = helpers.trainingModelWithLoss(model,
                                                   loss=torch.nn.MSELoss())
    target = torch.randn(10)
    input = torch.randn(10)

    # Make sure the first run doesn't already pass the test.
    original, loss = poptorch_model(input, target)
    assert not torch.allclose(original, target, rtol=1e-02, atol=1e-02)

    # Train on IPU.
    for _ in range(0, 2500):
        trained_out, trained_loss = poptorch_model(input, target)

    # Check we have trained the "model"
    assert torch.allclose(trained_out, target, rtol=1e-02, atol=1e-02)

    # Overwrite the trained weights with weights from host.
    poptorch_model.copyWeightsToDevice()

    # Don't train them.
    poptorch_model.setOptimizer(optim.SGD(model.parameters(), lr=0.0))

    out, loss = poptorch_model(input, target)
    host_out = model(input)

    # Check we are no longer trained.
    assert not torch.allclose(out, target, rtol=1e-02, atol=1e-02)
    assert not torch.allclose(loss, trained_loss)

    assert torch.allclose(host_out, out)
Esempio n. 2
0
def test_trainingBatching():
    torch.manual_seed(4424242)

    # 10 Batches of 10.
    input = torch.randn(10, 10)

    # 10 batches of 1
    label = torch.randint(0, 10, [1])
    label = label.expand([10])
    model = torch.nn.Linear(10, 10)

    # Run on IPU batch size 1 * 10 popart batches.
    opts = poptorch.Options().deviceIterations(10)
    poptorch_model = helpers.trainingModelWithLoss(
        model, options=opts, loss=torch.nn.CrossEntropyLoss())

    # Run all 10 batches as batchsize 10.
    out = model(input)

    # Sanity check we weren't already matching the label.
    assert not torch.equal(torch.argmax(out, dim=1), label)

    for _ in range(0, 1000):
        _, loss = poptorch_model(input, label)

        # Each batch should NOT report its own loss. As by default training model should have a "Final" anchor.
        assert len(loss.size()) == 0

    # Run with trained weights.
    out = model(input)

    # Check we are now equal with labels.
    assert torch.equal(torch.argmax(out, dim=1), label)
Esempio n. 3
0
def test_bigger_model_training():
    torch.manual_seed(42)

    model = torch.nn.Sequential(torch.nn.Linear(10,
                                                10), torch.nn.Linear(10, 10),
                                torch.nn.Linear(10,
                                                10), torch.nn.Linear(10, 10),
                                torch.nn.Linear(10, 10))

    poptorch_model = helpers.trainingModelWithLoss(model,
                                                   loss=torch.nn.MSELoss())

    target = torch.randn(10)
    input = torch.randn(10).half()

    # Make sure the first run doesn't already pass the test.s
    original, original_loss = poptorch_model(input, target.half())
    assert original_loss > 0.1
    assert not torch.allclose(original.float(), target, rtol=1e-02, atol=1e-02)

    for _ in range(0, 2500):
        out, loss = poptorch_model(input, target.half())

    # Check we have trained the "model"
    assert loss.float() < 0.001
    assert torch.allclose(out.float(), target, rtol=1e-02, atol=1e-02)
Esempio n. 4
0
def test_LogSoftmax():
    torch.manual_seed(42)

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.linear = torch.nn.Linear(10, 10)
            self.softmax = torch.nn.LogSoftmax(dim=1)

        def forward(self, x):
            x = self.linear(x)
            return self.softmax(x)

    model = Net()

    poptorch_model = helpers.trainingModelWithLoss(
        model, loss=torch.nn.NLLLoss(reduction="mean"))

    for _ in range(0, 10):
        label = torch.randint(0, 10, [1])
        input = torch.randn(1, 10)

        # Run on host.
        groundTruth = model(input)
        poptorch_out, _ = poptorch_model(input, label)

        assert torch.allclose(groundTruth, poptorch_out)
Esempio n. 5
0
def test_sgd_IR_accum_type(opt, accType):
    torch.manual_seed(42)
    model = torch.nn.Linear(10, 10).half()

    # "Train" with learning rate of zero and check the loss remains the same.
    optimizer = opt(model.parameters(), lr=0.01, accumType=accType)
    # These two should also be tested but they don't appear to work in popart yet.
    #firstOrderMomentumAccumType=torch.float16,
    #secondOrderMomentumAccumType=torch.float16 )
    poptorch_model = helpers.trainingModelWithLoss(
        model, loss=torch.nn.CrossEntropyLoss().half(), optimizer=optimizer)

    input = torch.randn(1, 10).half()
    label = torch.randint(0, 10, [1])

    poptorch_model(input, label)

    as_json = json.load(StringIO(poptorch_model._debugGetPopartIR()))  # pylint: disable=protected-access

    numCastsFound = sum([op["type"] == "Cast" for op in as_json["maingraph"]])

    if accType == torch.float16:
        assert numCastsFound == 2
    else:
        assert numCastsFound == 0
Esempio n. 6
0
def test_NLLLoss2d_training(reduction):

    torch.manual_seed(42)
    N, C, M = 3, 2, 5

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.linear = torch.nn.Linear(M * M, M * M * C)
            self.softmax = torch.nn.LogSoftmax(dim=1)

        def forward(self, x):
            x = x.reshape(N, M * M)
            x = self.linear(x).reshape(N, C, M, M)
            return self.softmax(x)

    model = Net()

    poptorch_model = helpers.trainingModelWithLoss(
        model, loss=torch.nn.NLLLoss(reduction=reduction))
    x = torch.randn(N, M, M)
    y = torch.empty(N, M, M, dtype=torch.long).random_(0, C)

    _, original_loss = poptorch_model(x, y)

    for _ in range(0, 1000):
        out, loss = poptorch_model(x, y)

    # # Check we have trained the "model"
    assert loss < original_loss
    torch.testing.assert_allclose(torch.argmax(out, dim=1), y)
Esempio n. 7
0
def test_NLLLoss_training(reduction):

    torch.manual_seed(42)

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.linear = torch.nn.Linear(10, 10)
            self.softmax = torch.nn.LogSoftmax(dim=1)

        def forward(self, x):
            x = self.linear(x)
            return self.softmax(x)

    model = Net()

    poptorch_model = helpers.trainingModelWithLoss(
        model, loss=torch.nn.NLLLoss(reduction=reduction))
    input = torch.randn(1, 10)
    label = torch.randint(0, 10, [1])

    # Make sure the first run doesn't already pass the test.
    _, original_loss = poptorch_model(input, label)

    for _ in range(0, 1000):
        out, loss = poptorch_model(input, label)

    # # Check we have trained the "model"
    assert loss < original_loss
    assert torch.argmax(out, dim=1) == label
Esempio n. 8
0
def test_BCE_training(reduction):
    torch.manual_seed(42)

    torch.manual_seed(42)

    model = torch.nn.Sequential(torch.nn.Linear(10, 10), torch.nn.Sigmoid())

    poptorch_model = helpers.trainingModelWithLoss(
        model,
        loss=torch.nn.BCELoss(reduction=reduction),
        optimizer=optim.SGD(model.parameters(), lr=0.1))

    target = torch.empty(10).uniform_()
    input = torch.randn(10)

    # Make sure the first run doesn't already pass the test.
    _, original_loss = poptorch_model(input, target)

    for _ in range(0, 1000):
        out, loss = poptorch_model(input, target)

    print(out)
    print(target)
    print(loss)
    print("\n")

    # # Check we have trained the "model"
    assert loss < original_loss
    torch.testing.assert_allclose(target, out, rtol=1e-03, atol=1e-03)
Esempio n. 9
0
def test_lstm_fc_training():
    class LSTMModel(nn.Module):
        def __init__(self, input_size, hidden_size, classes):
            super(LSTMModel, self).__init__()
            self.lstm = nn.LSTM(input_size,
                                hidden_size,
                                num_layers=1,
                                bias=True)
            self.fc = nn.Linear(hidden_size, classes, bias=False)

        def forward(self, x):
            h1, _ = self.lstm(x)
            h2 = h1[-1, :, :]
            h3 = self.fc(h2)
            return h3

    torch.manual_seed(42)
    batch_size = 2
    input_size = 5
    classes = 3
    lstm = LSTMModel(input_size=input_size, hidden_size=3, classes=classes)

    ipuLstm = helpers.trainingModelWithLoss(lstm, loss=nn.CrossEntropyLoss())
    input = torch.randn(1, batch_size, input_size)
    label = torch.tensor([1, 2])
    ipuLstm(input, label.long())
Esempio n. 10
0
def test_access_parameters(use_half):
    torch.manual_seed(42)

    # 10 Batches of 10.
    input = torch.randn(10, 10)

    # 10 batches of 1
    label = torch.randint(0, 10, [1])
    label = label.expand([10])

    class Model(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.linear = torch.nn.Linear(10, 10)

        def forward(self, x):
            return self.linear(x)

    model = Model()

    if use_half:
        model.half()
        input = input.half()

    # Run on IPU batch size 1 * 10 popart batches.
    opts = poptorch.Options().deviceIterations(10)
    poptorch_model = helpers.trainingModelWithLoss(
        model, options=opts, loss=torch.nn.CrossEntropyLoss())

    original_weights = str(model.linear.weight)
    inference = poptorch.inferenceModel(model)

    # Run all 10 batches as batchsize 10.
    out = inference(input)

    assert original_weights == str(model.linear.weight)

    # Sanity check we weren't already matching the label.
    assert not torch.equal(torch.argmax(out.int(), dim=1), label)

    for _ in range(0, 1000):
        _, loss = poptorch_model(input, label)

        # Each batch should NOT report its own loss. As by default training model should have a "Final" anchor.
        assert len(loss.size()) == 0

    assert original_weights != str(poptorch_model.model.linear.weight)

    # Run with trained weights.
    out = inference(input)

    # Check we are now equal with labels.
    assert torch.equal(torch.argmax(out.int(), dim=1), label)
Esempio n. 11
0
def test_resnet():
    torch.manual_seed(42)

    image_input = torch.randn([1, 3, 224, 224]).half()
    t1 = torch.tensor([1.]).long()
    # We are running on a dummy input so it doesn't matter if the weights are trained.
    model = models.resnet18(pretrained=False)
    model.train()
    model.half()

    training_model = helpers.trainingModelWithLoss(model,
                                                   loss=torch.nn.NLLLoss())

    # Run on IPU.
    poptorch_out, loss = training_model(image_input, t1)

    assert poptorch_out.dtype == torch.half
    assert loss.dtype == torch.half
Esempio n. 12
0
def dropout_training_harness(dropout_op, input, check_func):
    # Create a model consisting of a single dropout operation
    # with a dummy parameter for the optimizer
    model = dropout_op
    model.register_parameter('param', torch.nn.Parameter(torch.empty(10)))
    torch.manual_seed(0)
    native_out = model(input)

    # Create a poptorch training model with a fixed random seed for deterministic runs
    # Note that the loss is irrelevant and ignored.
    opts = poptorch.Options().randomSeed(8)
    poptorch_model = helpers.trainingModelWithLoss(model,
                                                   loss=torch.nn.L1Loss(),
                                                   options=opts)
    dummy_label = torch.zeros_like(input)
    poptorch_out, _ = poptorch_model(input, dummy_label)
    assert native_out.size() == poptorch_out.size()
    check_func(poptorch_out)
Esempio n. 13
0
def test_L1Loss_training():
    torch.manual_seed(42)

    reductions = ["mean", "sum"]

    for reduction in reductions:
        torch.manual_seed(42)

        model = torch.nn.Linear(10, 10)

        poptorch_model = helpers.trainingModelWithLoss(
            model,
            loss=torch.nn.L1Loss(reduction=reduction),
            optimizer=optim.SGD(model.parameters(), lr=0.01))

        target = torch.randn(10)
        input = torch.randn(10)

        # Make sure the first run doesn't already pass the test.
        original, original_loss = poptorch_model(input, target)
        assert original_loss > 0.1
        assert not torch.allclose(original, target, rtol=1e-02, atol=1e-02)

        for i in range(0, 2000):
            out, loss = poptorch_model(input, target)

            # Model needs to adjust the LR in the middle to converge
            if i == 1000:
                poptorch_model.setOptimizer(
                    optim.SGD(model.parameters(), lr=0.001))

        # Check we have trained the "model"
        assert loss < original_loss

        # "sum" L1 losses tend to be very large compared to "mean"
        if reduction == "sum":
            assert loss < 0.1
        else:
            assert loss < 0.001

        assert torch.allclose(out, target, rtol=1e-02, atol=1e-02)
Esempio n. 14
0
def test_CrossEntropyLoss_training(reduction):
    torch.manual_seed(42)

    torch.manual_seed(42)

    model = torch.nn.Linear(10, 10)

    poptorch_model = helpers.trainingModelWithLoss(
        model, loss=torch.nn.CrossEntropyLoss(reduction=reduction))
    input = torch.randn(1, 10)
    label = torch.randint(0, 10, [1])

    # Make sure the first run doesn't already pass the test.
    _, original_loss = poptorch_model(input, label)

    for _ in range(0, 1000):
        out, loss = poptorch_model(input, label)

    # # Check we have trained the "model"
    assert loss < original_loss
    assert torch.argmax(out, dim=1) == label
Esempio n. 15
0
def test_instanceNorm(instanceNormXd):
    torch.manual_seed(42)

    d = instanceNormXd[1]

    class Model(nn.Module):
        def __init__(self):
            super(Model, self).__init__()
            self.norm = instanceNormXd[0](6, affine=True)
            self.fc1 = nn.Linear(6 * 2**d, 10)

        def forward(self, x):
            x = self.norm(x)
            x = x.flatten(1)
            return self.fc1(x)

    for _ in range(3):
        model = Model()
        opt = optim.AdamW(model.parameters(), lr=0.01)

        poptorch_model = helpers.trainingModelWithLoss(
            model, loss=nn.CrossEntropyLoss(), optimizer=opt)

        shape = [5, 6]
        shape.extend([2 for i in range(d)])

        # Offset the data by multiplying by random values and shifting by a random bias
        input = torch.randint(2, 10, shape) * torch.randn(
            shape) + torch.randint(2, 10, [1]) * torch.randn(1)
        label = torch.randint(0, 10, [shape[0]])

        _, original_loss = poptorch_model(input, label)

        for _ in range(0, 100):
            out, loss = poptorch_model(input, label)

        # Check we have trained the model
        assert loss < original_loss
        assert loss < 0.03
        assert torch.equal(torch.argmax(out, dim=1), label)
Esempio n. 16
0
def test_training():
    class TrainingNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.ln = torch.nn.Linear(100, 100)
            self.softmax = nn.Softmax(1)

        def forward(self, t):
            x = t[0]
            bias = t[1]
            x, y = poptorch.custom_op([x, bias],
                                      "Cube",
                                      "com.acme",
                                      1,
                                      example_outputs=[x, x])
            x = self.ln(x)
            return self.softmax(x), y

    model = TrainingNetwork()

    x = torch.rand((1, 100))
    bias = torch.full((1, 100), 2.0)

    y = torch.full([1], 42, dtype=torch.long)

    def custom_loss(model_out, labels):
        l1 = torch.nn.functional.nll_loss(model_out[0], labels)
        # Popart errors if this is unused.
        l2 = torch.sum(model_out[1]) * 0.0001

        return l1 + l2

    training = helpers.trainingModelWithLoss(model, custom_loss)

    for _ in range(0, 100):
        x = torch.rand((1, 100))
        out, _ = training((x, bias), y)

    assert torch.argmax(out[0]) == 42
Esempio n. 17
0
def test_sgd_IR(opt):
    torch.manual_seed(42)
    model = torch.nn.Linear(10, 10)

    # "Train" with learning rate of zero and check the loss remains the same.
    optimizer = opt(model.parameters(), lr=0.01)

    poptorch_model = helpers.trainingModelWithLoss(
        model, loss=torch.nn.CrossEntropyLoss(), optimizer=optimizer)

    input = torch.randn(1, 10)
    label = torch.randint(0, 10, [1])

    poptorch_model(input, label)

    as_json = json.load(StringIO(poptorch_model._debugGetPopartIR()))  # pylint: disable=protected-access

    AdamVarUpdate = 0
    AdamUpdater = 0
    SGD0VarUpdate = 0
    for name in as_json:
        assert name == "maingraph"
        for op in as_json[name]:
            if op['type'] == "AdamUpdater":
                AdamUpdater += 1
            elif op['type'] == "AdamVarUpdate":
                AdamVarUpdate += 1
            elif op['type'] == "SGD0VarUpdate":
                SGD0VarUpdate += 1

    if opt in (optim.SGD, poptorch.optim.SGD):
        assert SGD0VarUpdate == 2
        assert AdamVarUpdate == 0 and AdamUpdater == 0
    else:
        assert SGD0VarUpdate == 0
        assert AdamVarUpdate == 2 and AdamUpdater == 2
Esempio n. 18
0
def test_optimizer(opt):
    torch.manual_seed(42)

    model = torch.nn.Linear(10, 10)

    # "Train" with learning rate of zero and check the loss remains the same.
    optimizer = opt(model.parameters(), lr=0.00)

    poptorch_model = helpers.trainingModelWithLoss(
        model, loss=torch.nn.CrossEntropyLoss(), optimizer=optimizer)

    input = torch.randn(1, 10)
    label = torch.randint(0, 10, [1])

    # Make sure the first run doesn't already pass the test.
    _, original_loss = poptorch_model(input, label)

    # Loss shouldn't change.
    for _ in range(0, 50):
        out, loss = poptorch_model(input, label)
        assert loss == original_loss

    # We shouldn't get the right result.
    assert not torch.argmax(out, dim=1) == label

    # Update the optimizer and check the loss now begins to decrease.
    optimizer.param_groups[0]['lr'] = 0.01
    poptorch_model.setOptimizer(optimizer)

    for _ in range(0, 1000):
        out, loss = poptorch_model(input, label)

    # Check we have trained the "model"
    assert loss < original_loss
    assert loss < 0.03
    assert torch.argmax(out, dim=1) == label
Esempio n. 19
0
def test_velocity_scaling_copy():
    torch.manual_seed(42)

    model = torch.nn.Linear(10, 10)

    # "Train" with learning rate of zero and check the loss remains the same.
    optimizer = poptorch.optim.SGD(model.parameters(),
                                   lr=0.01,
                                   velocity_scaling=128)

    poptorch_model = helpers.trainingModelWithLoss(
        model,
        loss=torch.nn.CrossEntropyLoss(reduction="sum"),
        optimizer=optimizer)

    input = torch.randn(1, 10)
    label = torch.randint(0, 10, [1])

    poptorch_model(input, label)

    # Check copy.copy preserves optimizer Poptorch attributes
    o = copy.copy(optimizer)
    poptorch_model.setOptimizer(o)
    poptorch_model(input, label)
Esempio n. 20
0
def test_trainingAnchors(anchor):
    torch.manual_seed(42)

    # 1000 Batches of 10.
    input = torch.randn(1000, 10)

    # 1000 batches of 1
    label = torch.randint(0, 10, [1])
    label = label.expand([1000])

    # The model
    model = torch.nn.Linear(10, 10)

    # Run pytorch native on CPU batchsize 10.
    model(input)

    # Run on IPU batch size 1 * 1000 popart batches.
    opts = poptorch.Options().deviceIterations(1000)
    opts.anchorMode(anchor, anchor_return_period=20)
    poptorch_model = helpers.trainingModelWithLoss(
        model, options=opts, loss=torch.nn.CrossEntropyLoss())

    poptorchOut, loss = poptorch_model(input, label)

    if anchor == poptorch.AnchorMode.All:
        # Expect the full batch.
        assert len(poptorchOut.size()) == 2
        assert poptorchOut.size()[0] == 1000

        assert len(loss.size()) == 1
        assert loss.size()[0] == 1000

        # Check the rolling average loss is downward sloped.
        interval = 100
        previous_average = torch.mean(loss[:interval])
        for i in range(1, 1000 // interval):
            start = interval * i
            end = start + interval
            new_average = torch.mean(loss[start:end])

            assert new_average < previous_average

            previous_average = new_average

    elif anchor == poptorch.AnchorMode.EveryN:
        # Otherwise we are expecting device_iterations / N
        assert len(poptorchOut.size()) == 2
        assert poptorchOut.size()[0] == 50

        # There's too much noise in the losses for us to test directly without averaging like above so just test sizes.
        assert len(loss.size()) == 1
        assert loss.size()[0] == 50
    else:
        # Otherwise we are expecting just one element per batch.
        assert len(poptorchOut.size()) == 2
        assert poptorchOut.size()[0] == 1

        assert len(loss.size()) == 0

        if anchor in [poptorch.AnchorMode.Final, poptorch.AnchorMode.Default]:
            # We just have to check the loss is small.
            # This is just relative to the previously observed loss values on this test with this seed.
            assert loss < 0.2

        elif anchor == poptorch.AnchorMode.Sum:
            # We just have to check that the loss is huge.
            assert loss > 500.0
        else:
            assert False, "Unexpected anchor type %s" % anchor
Esempio n. 21
0
def test_weights_sharing_ipus():
    torch.manual_seed(42)
    model = torch.nn.Linear(10, 10)

    training_model = helpers.trainingModelWithLoss(model,
                                                   loss=torch.nn.MSELoss())
    training_model.deviceToHostCounter = 0
    realMethod = training_model.copyWeightsToHost

    def deviceToHostWrapper(model):
        model.deviceToHostCounter += 1
        realMethod()

    training_model.copyWeightsToHost = types.MethodType(
        deviceToHostWrapper, training_model)

    # Same model as above, they will share weights (in 'model') which once training is finished can be copied back.
    inference_model = poptorch.inferenceModel(model)
    target = torch.randn(10)
    input = torch.randn(10)

    out_inference = inference_model(input)
    assert not torch.allclose(out_inference, target, rtol=1e-02, atol=1e-02)

    # Make sure the first run doesn't already pass the test.
    original, _ = training_model(input, target)
    assert not torch.allclose(original, target, rtol=1e-02, atol=1e-02)

    # Train on IPU.
    for _ in range(0, 1000):
        out, _ = training_model(input, target)

    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed to train the model"

    # Run without copying the weights and check they've been automatically updated.
    out_inference = inference_model(input)
    assert torch.allclose(out_inference, out)
    assert training_model.deviceToHostCounter == 1, \
            "1 implicit copy after having trained the model"
    training_model.deviceToHostCounter = 0  # reset counter

    out_inference = inference_model(input)
    assert torch.allclose(out_inference, out)
    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed after inference"

    # Train on IPU.
    for _ in range(0, 1500):
        out, _ = training_model(input, target)

    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed to train the model"

    # Run without copying the weights and check they've been automatically updated.
    out_inference = inference_model(input)
    assert torch.allclose(out_inference, out)
    assert training_model.deviceToHostCounter == 1, \
            "1 implicit copy after having trained the model"
    training_model.deviceToHostCounter = 0  # reset counter

    out_inference = inference_model(input)
    assert torch.allclose(out_inference, out)
    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed after inference"

    # Check we have trained the "model"
    assert torch.allclose(out_inference, target, rtol=1e-02, atol=1e-02)
Esempio n. 22
0
def test_weight_update_replicas(process_id=0, num_processes=1):
    localReplicationFactor = 2

    opts = poptorch.Options()
    opts.replicationFactor(localReplicationFactor)
    opts.Distributed.configureProcessId(process_id, num_processes)

    replicationFactor = localReplicationFactor * opts.Distributed.numProcesses

    np.random.seed(42)

    A = np.random.rand(2, 4).astype(np.float32)
    B = np.ones((4, 6)).astype(np.float32)
    C = np.random.rand(2, 6).astype(np.float32)

    alpha = np.random.random(1).astype(np.float32)[0]
    beta = np.random.random(1).astype(np.float32)[0]

    class Model(torch.nn.Module):
        def __init__(self):
            super().__init__()

            self.b = torch.tensor(B, requires_grad=True)
            self.c = torch.tensor(C, requires_grad=True)

            # Create the weight tensors for pytorch
            self.B = torch.nn.Parameter(self.b, requires_grad=True)
            self.C = torch.nn.Parameter(self.c, requires_grad=True)

            self.matmul = torch.matmul

        def forward(self, input):
            # Perform the GEMM operation
            x = alpha * self.matmul(input, self.B) + beta * self.C
            return x

    def reference():
        module = Model()
        module.train()

        optimizer = torch.optim.SGD(module.parameters(),
                                    lr=0.01,
                                    weight_decay=0.0,
                                    momentum=0.0)

        a = torch.tensor(A, requires_grad=True)
        optimizer.zero_grad()

        outputs = ()

        # graph with gradient accumlation i.e. only update the weights after x passes
        for _ in range(replicationFactor):
            o = module(a)
            outputs = outputs + (o, )
            loss = torch.nn.L1Loss(reduction="mean")
            target = torch.zeros(o.size())
            output = loss(o, target)
            output.backward()

        # Update the weights
        optimizer.step()

        # Only keep the output slice corresponding to this process
        outputs = outputs[opts.Distributed.processId *
                          localReplicationFactor:][:localReplicationFactor]
        return [torch.cat(outputs), module.B.data, module.C.data]

    model = Model()
    poptorch_model = helpers.trainingModelWithLoss(
        model,
        options=opts,
        loss=torch.nn.L1Loss(reduction="mean"),
        optimizer=torch.optim.SGD(model.parameters(),
                                  lr=0.01,
                                  weight_decay=0.0,
                                  momentum=0.0))

    ref_out = reference()
    ipu_A = np.concatenate([A for _ in range(localReplicationFactor)])

    target = torch.zeros(2 * localReplicationFactor, 6)
    output, _ = poptorch_model(torch.tensor(ipu_A, requires_grad=True), target)
    out = [output, model.B.data, model.C.data]
    for idx, ref in enumerate(ref_out):
        print("Validating output %d" % idx)
        torch.testing.assert_allclose(out[idx], ref, rtol=1e-03, atol=1e-03)
Esempio n. 23
0
def test_weights_sharing_ipu_cpu():
    torch.manual_seed(42)
    model = torch.nn.Linear(10, 10)

    training_model = helpers.trainingModelWithLoss(model,
                                                   loss=torch.nn.MSELoss())
    training_model.deviceToHostCounter = 0
    realMethod = training_model.copyWeightsToHost

    original_parameters = str(list(model.parameters()))

    def deviceToHostWrapper(model):
        model.deviceToHostCounter += 1
        realMethod()

    training_model.copyWeightsToHost = types.MethodType(
        deviceToHostWrapper, training_model)

    # Same model as above, they will share weights (in 'model') which once training is finished can be copied back.
    target = torch.randn(10)
    input = torch.randn(10)

    # Make sure the first run doesn't already pass the test.
    original, _ = training_model(input, target)
    assert not torch.allclose(original, target, rtol=1e-02, atol=1e-02)

    # Train on IPU.
    for _ in range(0, 100):
        out, _ = training_model(input, target)

    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed to train the model"

    # Run without copying the weights and check they've been automatically updated.
    nativeOut = model(input)
    assert torch.allclose(nativeOut, out)
    assert training_model.deviceToHostCounter == 1, \
            "1 implicit copy after having trained the model"
    training_model.deviceToHostCounter = 0  # reset counter

    current_parameters = str(list(model.parameters()))
    assert original_parameters != current_parameters
    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed to access the parameters after inference"
    last_parameters = current_parameters

    nativeOut = model(input)
    assert torch.allclose(nativeOut, out)
    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed after inference"

    current_parameters = str(list(model.parameters()))
    assert last_parameters == current_parameters
    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed to access the parameters after inference"

    # Train on IPU.
    for _ in range(0, 50):
        out, _ = training_model(input, target)

    current_parameters = str(list(model.parameters()))
    assert training_model.deviceToHostCounter == 1, \
            "1 implicit copy after having trained the model"
    assert original_parameters != current_parameters
    training_model.deviceToHostCounter = 0  # reset counter

    for _ in range(0, 50):
        out, _ = training_model(input, target)

    # Access a parameter directly:
    print(model.weight.data)

    assert training_model.deviceToHostCounter == 1, \
            "1 implicit copy after having trained the model"
    training_model.deviceToHostCounter = 0  # reset counter

    for _ in range(0, 50):
        out, _ = training_model(input, target)

    # Check state_dict works: torch.save(model.state_dict(), "/tmp/model.save")
    model.state_dict()

    assert training_model.deviceToHostCounter == 1, \
            "1 implicit copy after having trained the model"
    training_model.deviceToHostCounter = 0  # reset counter

    for _ in range(0, 50):
        out, _ = training_model(input, target)

    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed to train the model"

    # Run without copying the weights and check they've been automatically updated.
    nativeOut = model(input)
    assert torch.allclose(nativeOut, out)
    assert training_model.deviceToHostCounter == 1, \
            "1 implicit copy after having trained the model"
    training_model.deviceToHostCounter = 0  # reset counter

    nativeOut = model(input)
    assert torch.allclose(nativeOut, out)
    assert training_model.deviceToHostCounter == 0, \
            "No implicit copy needed after inference"

    # Check we have trained the "model"
    assert torch.allclose(nativeOut, target, rtol=1e-02, atol=1e-02)