Ejemplo n.º 1
0
def test_adamw_optimizer_execution():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    pt_model, onnx_model = _get_models(device, batch_size, input_size,
                                       hidden_size, output_size)

    x = torch.randn(batch_size, input_size, device=device)
    target = torch.randn(batch_size, output_size, device=device)

    simple_model = SimpleTrainingModelWithMSELoss()
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)

    optimizer = onnxblock.optim.AdamW()
    with onnxblock.onnx_model() as accessor:
        output_name = optimizer(simple_model.parameters())
        optimizer_model = accessor.model

    learning_rate = 0.001
    step = 1
    ort_output_names = [output_name]

    def mse_loss(prediction, target):
        loss = torch.nn.MSELoss()
        return loss(prediction, target)

    # When
    with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo:
        onnx.save(optimizer_model, onnx_fo.name)

        loss = mse_loss(pt_model(x), target)
        loss.backward()

        ort_inputs = {
            "learning_rate": np.full(1, learning_rate, dtype=np.float32),
            "step": np.full(1, step, dtype=np.int64),
            "params": [],
            "first_order_moments": [],
            "second_order_moments": [],
        }
        for name, param in pt_model.named_parameters():
            ort_inputs["params"].append(_to_numpy(copy.deepcopy(param)))
            ort_inputs[f"{name}_grad"] = _to_numpy(copy.deepcopy(param.grad))
            ort_inputs["first_order_moments"].append(
                _to_numpy(torch.zeros_like(param)))
            ort_inputs["second_order_moments"].append(
                _to_numpy(torch.zeros_like(param)))

        # Then no error occurs when executing the model
        ort_session = onnxruntime.InferenceSession(
            onnx_fo.name, providers=C.get_available_providers())
        _ = ort_session.run(ort_output_names, ort_inputs)
Ejemplo n.º 2
0
def test_set_requires_grad_on_inputs():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    _, onnx_model = _get_models(device, batch_size, input_size, hidden_size,
                                output_size)

    # When
    simple_model = SimpleTrainingModelWithMSELoss()
    simple_model.requires_grad("input-0")
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)

    # Then
    expecteinput_sizeput_gradient_buffer_name = "input-0_grad.accumulation.buffer"
    expecteinput_sizeput_gradient_output_name = "input-0_grad.accumulation.out"
    graph_input_names = {
        graph_input.name
        for graph_input in onnx_model.graph.input
    }
    graph_output_names = {
        graph_output.name
        for graph_output in onnx_model.graph.output
    }

    assert expecteinput_sizeput_gradient_buffer_name in graph_input_names
    assert expecteinput_sizeput_gradient_output_name in graph_output_names
Ejemplo n.º 3
0
def test_retrieve_parameters():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    pt_model, onnx_model = _get_models(device, batch_size, input_size,
                                       hidden_size, output_size)

    simple_model = SimpleTrainingModelWithMSELoss()
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)

    # When
    trainable_params, non_trainable_params = simple_model.parameters()

    # Then
    assert not non_trainable_params
    for ort_param, (pt_param_name,
                    pt_param) in zip(trainable_params,
                                     pt_model.named_parameters()):
        assert ort_param.name == pt_param_name
        assert np.allclose(
            np.frombuffer(ort_param.raw_data,
                          dtype=np.float32).reshape(pt_param.shape),
            _to_numpy(pt_param),
        )
Ejemplo n.º 4
0
def test_bcewithlogits_loss_training_graph_execution():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    pt_model, onnx_model = _get_models(device, batch_size, input_size,
                                       hidden_size, output_size)

    x = torch.randn(batch_size, input_size, device=device)
    target = torch.randn(batch_size, output_size, device=device)

    # Build the onnx model with loss
    simple_model = SimpleTrainingModelWithBCEWithLogitsLoss()
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)

    ort_output_names = _get_training_ort_output_names(pt_model, onnx_model)
    ort_inputs = _get_training_ort_inputs(x, target, pt_model, onnx_model)

    def bcewithlogits_loss(prediction, target):
        loss = torch.nn.BCEWithLogitsLoss()
        return loss(prediction, target)

    # When
    with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo:
        onnx.save(onnx_model, onnx_fo.name)
        ort_session = onnxruntime.InferenceSession(
            onnx_fo.name, providers=C.get_available_providers())

        ort_outs = ort_session.run(ort_output_names, ort_inputs)
        torch_outs = bcewithlogits_loss(pt_model(x), target)
        torch_outs.backward()

        # Then
        # assert loss is close
        assert np.allclose(ort_outs[0], _to_numpy(torch_outs))
Ejemplo n.º 5
0
def test_adamw_optimizer_composition(graph, grad_clipping):
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    _, onnx_model = _get_models(device, batch_size, input_size, hidden_size,
                                output_size)

    # When / Then no error occurs
    simple_model = graph()
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)

    optimizer = onnxblock.optim.AdamW(clip_grad=grad_clipping)
    with onnxblock.onnx_model() as accessor:
        _ = optimizer(simple_model.parameters())
        optimizer_model = accessor.model
        assert optimizer_model
Ejemplo n.º 6
0
def test_loss_composition(graph):
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    _, onnx_model = _get_models(device, batch_size, input_size, hidden_size,
                                output_size)

    # When / Then no error occurs
    simple_model = graph()
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)
Ejemplo n.º 7
0
def test_weighted_average_model_composition(model_type):
    # Given
    class TwoOutputNet(torch.nn.Module):
        def __init__(self, input_size, hidden_size, num_classes):
            super(TwoOutputNet, self).__init__()

            self.fc1_1 = torch.nn.Linear(input_size, hidden_size)
            self.relu1 = torch.nn.ReLU()
            self.fc1_2 = torch.nn.Linear(hidden_size, num_classes)

            self.fc2_1 = torch.nn.Linear(input_size, hidden_size)
            self.relu2 = torch.nn.ReLU()
            self.fc2_2 = torch.nn.Linear(hidden_size, num_classes)

        def forward(self, model_input1, model_input2):
            out1 = self.fc1_2(self.relu1(self.fc1_1(model_input1)))
            out2 = self.fc2_2(self.relu2(self.fc2_1(model_input2)))
            return out1, out2

    class WeightedAvg(model_type):
        def __init__(self, w1, w2):
            super(WeightedAvg, self).__init__()

            self.loss1 = onnxblock.loss.CrossEntropyLoss()
            self.loss2 = onnxblock.loss.CrossEntropyLoss()
            self.w1 = onnxblock.building_blocks.Constant(w1)
            self.w2 = onnxblock.building_blocks.Constant(w2)
            self.mul = onnxblock.building_blocks.Mul()
            self.add = onnxblock.building_blocks.Add()

        def build(self, loss_input_name1, loss_input_name2):
            return self.add(
                self.mul(self.w1(),
                         self.loss1(loss_input_name1, labels_name="labels1")),
                self.mul(self.w2(),
                         self.loss2(loss_input_name2, labels_name="labels2")),
            )

    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    pt_model = TwoOutputNet(input_size, hidden_size, output_size).to(device)
    x1 = torch.randn(batch_size, input_size, device=device)
    x2 = torch.randn(batch_size, input_size, device=device)
    onnx_model = _get_onnx_model(pt_model, (x1, x2))

    # When / Then no error occurs
    weighted_model = WeightedAvg(random.random(), random.random())
    with onnxblock.onnx_model(onnx_model):
        _ = weighted_model(onnx_model.graph.output[0].name,
                           onnx_model.graph.output[1].name)
Ejemplo n.º 8
0
def test_load_checkpoint():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    _, zero_onnx_model = _get_models(device,
                                     batch_size,
                                     input_size,
                                     hidden_size,
                                     output_size,
                                     zero_flag=True)
    for i in range(len(zero_onnx_model.graph.initializer)):
        zero_np = onnx.numpy_helper.to_array(
            zero_onnx_model.graph.initializer[i])
        assert np.allclose(zero_np, np.zeros(zero_np.shape))

    _, onnx_model = _get_models(device, batch_size, input_size, hidden_size,
                                output_size)

    # Copy of onnx_model for comparison
    onnx_model_copy = copy.deepcopy(onnx_model)

    simple_model = SimpleTrainingModelWithMSELoss()

    # When
    simple_model.requires_grad("fc2.weight", False)
    simple_model.requires_grad("fc1.bias", False)

    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)
    trainable_params, non_trainable_params = simple_model.parameters()

    with tempfile.TemporaryDirectory() as checkpoint_dir_name:
        checkpoint_file_path = os.path.join(checkpoint_dir_name, "checkpoint")
        onnxblock.save_checkpoint((trainable_params, non_trainable_params),
                                  checkpoint_file_path)

        # Load checkpoint parameters to the new simple model
        onnxblock.load_checkpoint_to_model(checkpoint_file_path,
                                           zero_onnx_model)

        # Then
        onnx_model_copy.graph.initializer.sort(key=lambda x: x.name)
        zero_onnx_model.graph.initializer.sort(key=lambda x: x.name)

        for i, _ in enumerate(onnx_model_copy.graph.initializer):
            onnx_np = onnx.numpy_helper.to_array(
                onnx_model_copy.graph.initializer[i])
            zero_np = onnx.numpy_helper.to_array(
                zero_onnx_model.graph.initializer[i])
            assert np.allclose(onnx_np, zero_np)
Ejemplo n.º 9
0
def test_save_checkpoint():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    _, onnx_model = _get_models(device, batch_size, input_size, hidden_size,
                                output_size)

    simple_model = SimpleTrainingModelWithMSELoss()
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)
    trainable_params, non_trainable_params = simple_model.parameters()

    # When
    with tempfile.TemporaryDirectory() as checkpoint_dir_name:
        checkpoint_file_path = os.path.join(checkpoint_dir_name, "checkpoint")
        onnxblock.save_checkpoint((trainable_params, non_trainable_params),
                                  checkpoint_file_path)
        # Then
        assert os.path.exists(checkpoint_file_path)
Ejemplo n.º 10
0
def test_crossentropy_loss_execution():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    pt_model, onnx_model = _get_models(device, batch_size, input_size,
                                       hidden_size, output_size)

    x = torch.randn(batch_size, input_size, device=device)
    target = torch.randint(high=output_size,
                           size=(batch_size, ),
                           dtype=torch.int64,
                           device=device)

    # Build the onnx model with loss
    simple_model = SimpleModelWithCrossEntropyLoss()
    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)

    ort_output_names = [onnx_model.graph.output[0].name]
    ort_inputs = {
        onnx_model.graph.input[0].name:
        _to_numpy(copy.deepcopy(x)),
        onnx_model.graph.input[1].name:
        _to_numpy(copy.deepcopy(target).type(torch.int32)),
    }

    def crossentropy_loss(prediction, target):
        loss = torch.nn.CrossEntropyLoss()
        return loss(prediction, target)

    # When
    with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo:
        onnx.save(onnx_model, onnx_fo.name)
        ort_session = onnxruntime.InferenceSession(
            onnx_fo.name, providers=C.get_available_providers())

        ort_outs = ort_session.run(ort_output_names, ort_inputs)
        torch_outs = crossentropy_loss(pt_model(x), target)

        # Then
        assert np.allclose(ort_outs[0], _to_numpy(torch_outs))
Ejemplo n.º 11
0
def test_set_requires_grad_on_parameters():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    _, onnx_model = _get_models(device, batch_size, input_size, hidden_size,
                                output_size)

    simple_model = SimpleTrainingModelWithMSELoss()

    # When
    simple_model.requires_grad("fc2.weight", False)
    simple_model.requires_grad("fc1.bias", False)

    with onnxblock.onnx_model(onnx_model):
        _ = simple_model(onnx_model.graph.output[0].name)
    trainable_params, non_trainable_params = simple_model.parameters()

    # Then
    expected_trainable_parameters = {"fc1.weight", "fc2.bias"}
    expected_non_trainable_parameters = {"fc2.weight", "fc1.bias"}
    for param in trainable_params:
        assert param.name in expected_trainable_parameters
    for param in non_trainable_params:
        assert param.name in expected_non_trainable_parameters
Ejemplo n.º 12
0
def test_grad_clipping_execution():
    # Given
    device = "cuda"
    batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10
    pt_model, _ = _get_models(device, batch_size, input_size, hidden_size,
                              output_size)
    x = torch.randn(batch_size, input_size, device=device)
    target = torch.randn(batch_size, output_size, device=device)

    # Prepare the onnx model with only grad clipping
    onnx_model = onnx.ModelProto()
    onnx_model.graph.name = "AdamW Optimizer Model"
    onnx_model.producer_name = "grad clipping test"
    onnx_model.opset_import.extend(onnxblock.optim.optim._OPSET_IMPORTS)
    onnx_model.ir_version = onnx.IR_VERSION

    class GradClippingModel(onnxblock.Model):
        def __init__(self, max_norm):
            self._grad_clip = onnxblock.optim.ClipGradNorm(max_norm)

        def build(self, *grad_names):
            return self._grad_clip(*grad_names)

    grad_names = []
    for name, param in pt_model.named_parameters():
        grad_names.append(f"{name}_grad")

        onnx_model.graph.input.append(
            onnx.helper.make_tensor_value_info(grad_names[-1],
                                               onnx.TensorProto.FLOAT,
                                               param.shape))

    grad_clip = GradClippingModel(2.5)

    with onnxblock.onnx_model(onnx_model):
        ort_output_names = grad_clip(*grad_names)

    def mse_loss(prediction, target):
        loss = torch.nn.MSELoss()
        return loss(prediction, target)

    # When
    with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo:
        onnx.save(onnx_model, onnx_fo.name)

        loss = mse_loss(pt_model(x), target)
        loss.backward()

        ort_inputs = {}
        for name, param in pt_model.named_parameters():
            ort_inputs[f"{name}_grad"] = _to_numpy(copy.deepcopy(param.grad))

        torch.nn.utils.clip_grad_norm_(pt_model.parameters(), 2.5)

        # Then no error occurs when executing the model
        ort_session = onnxruntime.InferenceSession(
            onnx_fo.name, providers=C.get_available_providers())
        ort_outs = ort_session.run(ort_output_names, ort_inputs)

        # assert all the gradients are close
        for ort_grad, pt_param in zip(ort_outs, pt_model.parameters()):
            assert np.allclose(ort_grad, _to_numpy(pt_param.grad))