Beispiel #1
0
def _test_basic_func(rank, world_size, tempfile_name, test_case):
    _dist_init(rank, world_size, tempfile_name, backend="nccl")  # Covers nccl

    model = Linear(2, 2, bias=False)
    model.to("cuda")
    model = DDP(model, device_ids=[rank])
    optim = AdaScale(SGD(model.parameters(), lr=0.1))
    if "input" in test_case:
        # single iter
        in_data = Tensor(test_case["input"][rank])
        in_data = in_data.cuda()
        out = model(in_data)
        out.sum().backward()
        assert np.allclose(optim.gain(),
                           test_case["expected_gain"]), optim.gain()
        optim.step()
        optim.zero_grad()
    else:
        # multiple iters
        for in_data in test_case["inputs"]:
            in_data = Tensor(in_data[rank]).cuda()
            out = model(in_data)
            out.sum().backward()
            optim.step()
            optim.zero_grad()
        assert np.allclose(optim.gain(),
                           test_case["expected_gain"]), optim.gain()

    dist.destroy_process_group()
Beispiel #2
0
    def test_one_iteration(self):
        """Test FSDP with uneven divide of parameter shards."""
        model = Linear(3, 3, bias=False)
        input = torch.rand(8, 3)
        my_lr = 0.1

        ref_forward_output_my_rank, ref_weight_out = self._get_ref_results(
            model, input, my_lr
        )

        model.to(self.rank)
        model = FSDP(model)
        optim = SGD(model.parameters(), lr=my_lr)
        self.assertTrue(len(input) >= self.world_size)
        in_data = torch.Tensor(input[self.rank]).to(self.rank)
        out = model(in_data)
        out.float().sum().backward()
        optim.step()
        optim.zero_grad()

        with model._summon_full_params():
            torch.cuda.synchronize()  # TODO: This is here because it was
            # originally part of get_full_params(), debug why it is needed here.
            weight_out = model.module.weight.T.clone()
            self.assertEqual(ref_forward_output_my_rank, out)
            self.assertEqual(ref_weight_out, weight_out)
Beispiel #3
0
def _test_create_supervised_trainer(
    model_device: Optional[str] = None,
    trainer_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
):
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()
    optimizer = SGD(model.parameters(), 0.1)

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    if amp_mode == "apex" and model_device == trainer_device == "cuda":
        from apex import amp

        model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

    trainer = create_supervised_trainer(
        model,
        optimizer,
        mse_loss,
        device=trainer_device,
        output_transform=lambda x, y, y_pred, loss: (y_pred, loss.item()),
        amp_mode=amp_mode,
        scaler=scaler,
    )

    x = torch.tensor([[0.1], [0.2]])
    y = torch.tensor([[0.3], [0.5]])
    data = [(x, y)]

    assert model.weight.data[0, 0].item() == approx(0.0)
    assert model.bias.item() == approx(0.0)

    if model_device == trainer_device or ((model_device == "cpu") ^ (trainer_device == "cpu")):
        state = trainer.run(data)

        assert state.output[-1] == approx(0.17), state.output[-1]
        assert round(model.weight.data[0, 0].item(), 3) == approx(0.013), model.weight.item()
        assert round(model.bias.item(), 3) == approx(0.08), model.bias.item()

        if amp_mode == "amp":
            assert state.output[0].dtype is torch.half
            if scaler and isinstance(scaler, bool):
                assert hasattr(state, "scaler")
            else:
                assert not hasattr(state, "scaler")
    else:
        if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"):
            # This is broken in 1.6.0 but will be probably fixed with 1.7.0
            with pytest.raises(RuntimeError, match=r"is on CPU, but expected them to be on GPU"):
                trainer.run(data)
Beispiel #4
0
def _test_create_evaluation_step(
    mock_torch_cuda_amp_module,
    model_device: Optional[str] = None,
    evaluator_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
):
    output_transform_mock = MagicMock()
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    device_type = evaluator_device.type if isinstance(evaluator_device, torch.device) else evaluator_device
    on_tpu = "xla" in device_type if device_type is not None else False
    mode, _ = _check_arg(on_tpu, amp_mode, None)

    evaluate_step = supervised_evaluation_step(model, evaluator_device, output_transform=output_transform_mock)

    x = torch.tensor([[1.0], [2.0]])
    y = torch.tensor([[3.0], [5.0]])
    data = [(x, y)]
    evaluator = Engine(evaluate_step)

    evaluator.run(data)
    assert not mock_torch_cuda_amp_module.called
    assert output_transform_mock.called
Beispiel #5
0
def _test_apex_average(device, amp_mode, opt_level):
    assert amp_mode == "apex"
    assert device == "cuda"

    model = Linear(1, 1)

    if device:
        model.to(device)

    model.weight.data.zero_()
    model.bias.data.zero_()
    optimizer = SGD(model.parameters(), 0.1)

    from apex import amp

    model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level)

    mean_var = VariableAccumulation(lambda a, x: a + x)
    y_true = torch.rand(100).float().to(device)

    for y in y_true:
        mean_var.update(y)

    a, n = mean_var.compute()
    assert a.item() == pytest.approx(y_true.sum().item())
    assert n == len(y_true)
def _default_create_supervised_evaluator(
    model_device: Optional[str] = None,
    evaluator_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
):
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    evaluator = create_supervised_evaluator(model,
                                            device=evaluator_device,
                                            amp_mode=amp_mode)

    assert model.weight.data[0, 0].item() == approx(0.0)
    assert model.bias.item() == approx(0.0)

    return model, evaluator
Beispiel #7
0
def _test_grad_accum_func(rank, world_size, tempfile_name):
    _dist_init(rank, world_size, tempfile_name, backend="gloo")  # Covers gloo

    model = Linear(4, 2, bias=False)
    model.to("cuda")
    model = DDP(model, device_ids=[rank])
    optim = AdaScale(SGD(model.parameters(), lr=0.1), num_gradients_to_accumulate=2)
    with model.no_sync():
        # iter 1, input vectors are pointing dim0 and dim1
        in_data = Tensor([0.0] * 4)
        in_data[rank] = 1.0
        in_data = in_data.cuda()
        out = model(in_data)
        out.sum().backward()
    # iter 2, input vectors are pointing dim2 and dim3
    in_data = Tensor([0.0] * 4)
    in_data[rank + 2] = 1.0
    in_data = in_data.cuda()
    out = model(in_data)
    out.sum().backward()
    # since all inputs are orthogonal, the gain should be exactly 4.0.
    assert np.allclose(optim.gain(), 4.0), optim.gain()
    optim.step()
    optim.zero_grad()

    dist.destroy_process_group()
Beispiel #8
0
def _test_create_mocked_supervised_trainer(
    model_device: Optional[str] = None,
    trainer_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
):
    with mock.patch("ignite.engine.supervised_training_step_amp") as training_step_amp_mock:
        with mock.patch("ignite.engine.supervised_training_step_apex") as training_step_apex_mock:
            with mock.patch("ignite.engine.supervised_training_step_tpu") as training_step_tpu_mock:
                with mock.patch("ignite.engine.supervised_training_step") as training_step_mock:
                    model = Linear(1, 1)

                    if model_device:
                        model.to(model_device)

                    model.weight.data.zero_()
                    model.bias.data.zero_()
                    optimizer = SGD(model.parameters(), 0.1)

                    if trace:
                        example_input = torch.randn(1, 1)
                        model = torch.jit.trace(model, example_input)

                    if amp_mode == "apex" and model_device == trainer_device == "cuda":
                        from apex import amp

                        model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

                    trainer = create_supervised_trainer(
                        model,
                        optimizer,
                        mse_loss,
                        device=trainer_device,
                        output_transform=lambda x, y, y_pred, loss: (y_pred, loss.item()),
                        amp_mode=amp_mode,
                        scaler=scaler,
                    )

                    x = torch.tensor([[0.1], [0.2]])
                    y = torch.tensor([[0.3], [0.5]])
                    data = [(x, y)]

                    assert model.weight.data[0, 0].item() == approx(0.0)
                    assert model.bias.item() == approx(0.0)

                    on_tpu = "xla" in trainer_device if trainer_device is not None else False
                    mode, _ = _check_arg(on_tpu, amp_mode, scaler)

                    if model_device == trainer_device or ((model_device == "cpu") ^ (trainer_device == "cpu")):
                        trainer.run(data)

                        if mode == "amp":
                            assert training_step_amp_mock.called
                        elif mode == "apex":
                            assert training_step_apex_mock.called
                        elif mode == "tpu":
                            assert training_step_tpu_mock.called
                        else:
                            assert training_step_mock.called
Beispiel #9
0
def _test_mocked_supervised_evaluator(
    model_device: Optional[str] = None,
    evaluator_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
):
    with mock.patch("ignite.engine.supervised_evaluation_step") as evaluation_step:
        with mock.patch("ignite.engine.supervised_evaluation_step_amp") as evaluation_step_amp:
            model = Linear(1, 1)

            if model_device:
                model.to(model_device)

            model.weight.data.zero_()
            model.bias.data.zero_()

            if trace:
                example_input = torch.randn(1, 1)
                model = torch.jit.trace(model, example_input)

            evaluator = create_supervised_evaluator(model, device=evaluator_device, amp_mode=amp_mode)

            x = torch.tensor([[1.0], [2.0]])
            y = torch.tensor([[3.0], [5.0]])
            data = [(x, y)]

            if model_device == evaluator_device or ((model_device == "cpu") ^ (evaluator_device == "cpu")):
                state = evaluator.run(data)

                if amp_mode == "amp":
                    assert evaluation_step_amp.called
                    assert not evaluation_step.called
                else:
                    assert evaluation_step.called
                    assert not evaluation_step_amp.called
Beispiel #10
0
def _test_basic_func(rank,
                     world_size,
                     tempfile_name,
                     test_case,
                     oss,
                     model=None):
    _dist_init(rank, world_size, tempfile_name, backend="nccl")

    if model is None:
        model = Linear(2, 2)
        model.bias.data.fill_(0.0)

    model.to("cuda")
    model = DDP(model, device_ids=[rank])

    assert oss in ["none", "ada-oss", "wrapper-oss", "oss-wrapper"]
    if oss == "ada-oss":
        optim = AdaScale(OSS(model.parameters(), SGD, lr=0.1))
    elif oss == "wrapper-oss":
        optim = AdaScaleWrapper(model.parameters(),
                                optim_cls=OSS,
                                optim=SGD,
                                lr=0.1)
    elif oss == "oss-wrapper":
        optim = OSS(model.parameters(), AdaScaleWrapper, optim_cls=SGD, lr=0.1)
    else:
        assert oss == "none"
        optim = AdaScale(SGD(model.parameters(), lr=0.1))

    if "input" in test_case:
        inputs = [test_case["input"]]
    else:
        inputs = test_case["inputs"]

    for in_data in inputs:
        in_data = Tensor(in_data[rank]).cuda()
        out = model(in_data)
        out.sum().backward()
        optim.step()
        optim.zero_grad()

    if "expected_gain" in test_case:
        assert np.allclose(optim.gain(),
                           test_case["expected_gain"]), "{} vs {}".format(
                               optim.gain(), test_case["expected_gain"])

    if "expected_mean_weight" in test_case:
        mean_weight = mean(
            [model.module[i].weight.data.mean().item() for i in range(4)])
        assert np.allclose(mean_weight,
                           test_case["expected_mean_weight"]), mean_weight

    dist.destroy_process_group()
def _test_mocked_supervised_evaluator(
    model_device: Optional[str] = None,
    evaluator_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
):
    with mock.patch(
            "ignite.engine.supervised_evaluation_step") as evaluation_step:
        with mock.patch("ignite.engine.supervised_evaluation_step_amp"
                        ) as evaluation_step_amp:
            model = Linear(1, 1)

            if model_device:
                model.to(model_device)

            model.weight.data.zero_()
            model.bias.data.zero_()

            if trace:
                example_input = torch.randn(1, 1)
                model = torch.jit.trace(model, example_input)

            evaluator = create_supervised_evaluator(model,
                                                    device=evaluator_device,
                                                    amp_mode=amp_mode)

            x = torch.tensor([[1.0], [2.0]])
            y = torch.tensor([[3.0], [5.0]])
            data = [(x, y)]

            if model_device == evaluator_device or (
                (model_device == "cpu") ^ (evaluator_device == "cpu")):
                state = evaluator.run(data)

                if amp_mode == "amp":
                    assert evaluation_step_amp.called
                    assert not evaluation_step.called
                else:
                    assert evaluation_step.called
                    assert not evaluation_step_amp.called

            else:
                if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"):
                    # This is broken in 1.6.0 but will be probably fixed with 1.7.0
                    with pytest.raises(
                            RuntimeError,
                            match=r"is on CPU, but expected them to be on GPU"
                    ):
                        evaluator.run(data)
Beispiel #12
0
class FFNN(nn.Module):
    def __init__(self, input_dim=768, score=True, num_layers=4):
        assert num_layers > 0, "FFNN cannot have non-positive layers"
        super(FFNN, self).__init__()
        self.layers = [
            Linear(input_dim, input_dim) for _ in range(num_layers - 1)
        ]
        self.fc = Linear(input_dim, 1)
        self.gelu = nn.GELU()
        self.score = score

    def forward(self, x):
        for l in self.layers:
            x = l(x)
        if self.score:
            output = self.gelu(self.fc(x))
            return output.squeeze(1)
        else:
            return self.fc(x)

    def to(self, device=torch.device("cpu"), *args, **kwargs):
        super(FFNN, self).to(device, *args, **kwargs)
        for i, layer in enumerate(self.layers):
            self.layers[i] = layer.to(device)
        self.fc = self.fc.to(device)
        self.gelu = self.gelu.to(device)
        return self
def _test_create_supervised_evaluator(
    model_device: Optional[str] = None,
    evaluator_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
):
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    evaluator = create_supervised_evaluator(model,
                                            device=evaluator_device,
                                            amp_mode=amp_mode)

    x = torch.tensor([[1.0], [2.0]])
    y = torch.tensor([[3.0], [5.0]])
    data = [(x, y)]

    if model_device == evaluator_device or ((model_device == "cpu") ^
                                            (evaluator_device == "cpu")):
        state = evaluator.run(data)

        y_pred, y = state.output

        assert y_pred[0, 0].item() == approx(0.0)
        assert y_pred[1, 0].item() == approx(0.0)
        assert y[0, 0].item() == approx(3.0)
        assert y[1, 0].item() == approx(5.0)

        assert model.weight.data[0, 0].item() == approx(0.0)
        assert model.bias.item() == approx(0.0)

    else:
        if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"):
            # This is broken in 1.6.0 but will be probably fixed with 1.7.0
            with pytest.raises(
                    RuntimeError,
                    match=r"is on CPU, but expected them to be on GPU"):
                evaluator.run(data)
Beispiel #14
0
 def __linear2tensor(linear: nn.Linear) -> Tensor:
     linear = linear.to("cpu")
     tensor = Tensor(linear.in_features, linear.out_features)
     for i in range(linear.in_features):
         idx = torch.sparse_coo_tensor([[i]], [1], [linear.in_features],
                                       dtype=torch.float)
         val = linear(idx)
         tensor[i] = val
     return tensor
Beispiel #15
0
def _test_basic_func(rank, world_size, tempfile_name):
    _dist_init(rank, world_size, tempfile_name, backend="nccl")  # Covers nccl

    model = Linear(2, 2, bias=False)
    model.to("cuda")
    model = DDP(model, device_ids=[rank])
    optim = AdaScale(SGD(model.parameters(), lr=0.1))
    # iter 1
    in_data = Tensor([0.0, 0.0])
    in_data[rank] = 1.0
    in_data = in_data.cuda()
    out = model(in_data)
    out.sum().backward()
    assert np.allclose(optim.gain(), 2.0), optim.gain()
    optim.step()
    optim.zero_grad()

    dist.destroy_process_group()
Beispiel #16
0
def _test_basic_func(rank, ddp_cls, world_size, tempfile_name, test_case):
    _dist_init(rank, world_size, tempfile_name, backend="nccl")  # Covers nccl

    model = Linear(2, 2)
    model.to("cuda")
    if ddp_cls is DDP:
        model = ddp_cls(model, device_ids=[rank])
        optim = AdaScale(SGD(model.parameters(), lr=0.1))
    elif ddp_cls is SDP:
        optim = AdaScale(OSS(model.parameters(), SGD, lr=0.1))
        model = ddp_cls(model, sharded_optimizer=optim)
    else:
        assert ddp_cls is FSDP, ddp_cls
        # Two cases:
        #    flatten=True : AdaScale wrapper must be after FSDP and it receives
        #                   a single grad tensor. It won't receive grad if
        #                   wrapped before.
        #    flatten=False: AdaScale can be both before or after FSDP.
        # So, it is better to do AdaScale after FSDP.
        model = ddp_cls(model, flatten_parameters=False)
        optim = AdaScale(SGD(model.parameters(), lr=0.1))
    if "input" in test_case:
        # single iter
        in_data = Tensor(test_case["input"][rank])
        in_data = in_data.cuda()
        out = model(in_data)
        out.sum().backward()
        if ddp_cls is DDP:
            assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain()
        optim.step()
        optim.zero_grad()
    else:
        # multiple iters
        for in_data in test_case["inputs"]:
            in_data = Tensor(in_data[rank]).cuda()
            out = model(in_data)
            out.sum().backward()
            optim.step()
            optim.zero_grad()
        if ddp_cls is DDP:
            assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain()

    dist.destroy_process_group()
Beispiel #17
0
def _test_create_supervised_trainer(model_device: Optional[str] = None,
                                    trainer_device: Optional[str] = None,
                                    trace: bool = False):
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()
    optimizer = SGD(model.parameters(), 0.1)

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        mse_loss,
                                        device=trainer_device)

    x = torch.tensor([[1.0], [2.0]])
    y = torch.tensor([[3.0], [5.0]])
    data = [(x, y)]

    assert model.weight.data[0, 0].item() == approx(0.0)
    assert model.bias.item() == approx(0.0)

    if model_device == trainer_device or ((model_device == "cpu") ^
                                          (trainer_device == "cpu")):
        state = trainer.run(data)

        assert state.output == approx(17.0)
        assert model.weight.data[0, 0].item() == approx(1.3)
        assert model.bias.item() == approx(0.8)
    else:
        if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"):
            # This is broken in 1.6.0 but will be probably fixed with 1.7.0
            with pytest.raises(
                    RuntimeError,
                    match=r"is on CPU, but expected them to be on GPU"):
                trainer.run(data)
Beispiel #18
0
def _test_basic_func(rank,
                     world_size,
                     tempfile_name,
                     test_case,
                     oss,
                     model=None):
    _dist_init(rank, world_size, tempfile_name, backend="nccl")

    if model is None:
        model = Linear(2, 2, bias=False)
    model.to("cuda")
    model = DDP(model, device_ids=[rank])
    if oss:
        # For now, we can only wrap AdaScale over OSS. If we do it the other way around,
        # AdaScale needs to take different parameter types, i.e. the parameter list, etc.
        optim = AdaScale(OSS(model.parameters(), SGD, lr=0.1))
    else:
        optim = AdaScale(SGD(model.parameters(), lr=0.1))

    if "input" in test_case:
        inputs = [test_case["input"]]
    else:
        inputs = test_case["inputs"]

    for in_data in inputs:
        in_data = Tensor(in_data[rank]).cuda()
        out = model(in_data)
        out.sum().backward()
        optim.step()
        optim.zero_grad()

    assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain()

    if "expected_mean_weight" in test_case:
        mean_weight = mean(
            [model.module[i].weight.data.mean().item() for i in range(4)])
        assert np.allclose(mean_weight,
                           test_case["expected_mean_weight"]), mean_weight

    dist.destroy_process_group()
def _default_create_supervised_trainer(
    gradient_accumulation_steps: int = 1,
    model_device: Optional[str] = None,
    trainer_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
):
    model = Linear(1, 1, bias=False)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    optimizer = SGD(model.parameters(), 0.1)

    if trace:
        example_input = torch.randn(1)
        model = torch.jit.trace(model, example_input)

    if amp_mode == "apex" and model_device == trainer_device == "cuda":
        from apex import amp

        model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

    trainer = create_supervised_trainer(
        model,
        optimizer,
        mse_loss,
        device=trainer_device,
        output_transform=lambda x, y, y_pred, loss: (y_pred, loss.item()),
        amp_mode=amp_mode,
        scaler=scaler,
        gradient_accumulation_steps=gradient_accumulation_steps,
    )
    assert model.weight.data[0, 0].item() == approx(0.0)

    return trainer, model
def _test_create_supervised_trainer(model_device: Optional[str] = None,
                                    trainer_device: Optional[str] = None,
                                    trace: bool = False):
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()
    optimizer = SGD(model.parameters(), 0.1)

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        mse_loss,
                                        device=trainer_device)

    x = torch.tensor([[1.0], [2.0]])
    y = torch.tensor([[3.0], [5.0]])
    data = [(x, y)]

    assert model.weight.data[0, 0].item() == approx(0.0)
    assert model.bias.item() == approx(0.0)

    if model_device == trainer_device or ((model_device == "cpu") ^
                                          (trainer_device == "cpu")):
        state = trainer.run(data)

        assert state.output == approx(17.0)
        assert model.weight.data[0, 0].item() == approx(1.3)
        assert model.bias.item() == approx(0.8)
    else:
        with pytest.raises(RuntimeError, match=r"device type"):
            trainer.run(data)
def _test_create_supervised_evaluator(model_device: Optional[str] = None,
                                      evaluator_device: Optional[str] = None,
                                      trace: bool = False):
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    evaluator = create_supervised_evaluator(model, device=evaluator_device)

    x = torch.tensor([[1.0], [2.0]])
    y = torch.tensor([[3.0], [5.0]])
    data = [(x, y)]

    if model_device == evaluator_device or ((model_device == "cpu") ^
                                            (evaluator_device == "cpu")):
        state = evaluator.run(data)

        y_pred, y = state.output

        assert y_pred[0, 0].item() == approx(0.0)
        assert y_pred[1, 0].item() == approx(0.0)
        assert y[0, 0].item() == approx(3.0)
        assert y[1, 0].item() == approx(5.0)

        assert model.weight.data[0, 0].item() == approx(0.0)
        assert model.bias.item() == approx(0.0)

    else:
        with pytest.raises(RuntimeError, match=r"device type"):
            evaluator.run(data)
def test_create_supervised_evaluator_on_cuda():
    device = "cuda"
    model = Linear(1, 1)
    model.to(device)
    model.weight.data.zero_()
    model.bias.data.zero_()

    evaluator = create_supervised_evaluator(model, device=device)

    x = torch.tensor([[1.0], [2.0]])
    y = torch.tensor([[3.0], [5.0]])
    data = [(x, y)]

    state = evaluator.run(data)
    y_pred, y = state.output

    assert y_pred[0, 0].item() == approx(0.0)
    assert y_pred[1, 0].item() == approx(0.0)
    assert y[0, 0].item() == approx(3.0)
    assert y[1, 0].item() == approx(5.0)

    assert model.weight.data[0, 0].item() == approx(0.0)
    assert model.bias.item() == approx(0.0)
Beispiel #23
0
    def test_one_iteration(self):
        """Test FSDP with uneven divide of parameter shards."""
        model = Linear(3, 3, bias=False)
        input = torch.rand(8, 3)
        my_lr = 0.1

        ref_forward_output_my_rank, ref_weight_out = self._get_ref_results(
            model, input, my_lr)

        model.to(self.rank)
        model = FSDP(model)
        optim = SGD(model.parameters(), lr=my_lr)
        self.assertTrue(len(input) >= self.world_size)
        in_data = torch.Tensor(input[self.rank]).to(self.rank)
        out = model(in_data)
        out.float().sum().backward()
        optim.step()
        optim.zero_grad()
        get_full_params(model)
        weight_out = model.module.weight.T.clone()

        self.assertEqual(ref_forward_output_my_rank, out)
        self.assertEqual(ref_weight_out, weight_out)
def test_create_supervised_trainer_on_cuda():
    device = "cuda"
    model = Linear(1, 1)
    model.to(device)
    model.weight.data.zero_()
    model.bias.data.zero_()
    optimizer = SGD(model.parameters(), 0.1)
    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        mse_loss,
                                        device=device)

    x = torch.tensor([[1.0], [2.0]])
    y = torch.tensor([[3.0], [5.0]])
    data = [(x, y)]

    assert model.weight.data[0, 0].item() == approx(0.0)
    assert model.bias.item() == approx(0.0)

    state = trainer.run(data)

    assert state.output == approx(17.0)
    assert model.weight.data[0, 0].item() == approx(1.3)
    assert model.bias.item() == approx(0.8)
Beispiel #25
0
class Train():
    def __init__(self,
                 lr = 0.01,
                 pretrain = False):
        #self.model = model()
        self.model = Linear(1200,400)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = self.model.to(self.device)
        if pretrain == True:
            load_model(self.model,m=1)
        self.optimizer = SGD(self.model.parameters(),lr = lr)
        lambda1 = lambda epoch : 0.98**epoch
        self.scheduler = lr_scheduler.LambdaLR(self.optimizer,lr_lambda = lambda1)
        self.img_c = torch.Tensor(cv.imread('./c.jpg')/255.).permute(2,0,1).unsqueeze(0).to(self.device)
        self.img_s = torch.Tensor(cv.imread('./s.jpg')/255.).permute(2,0,1).unsqueeze(0).to(self.device)
        
    def train(self, epoch_num, step_a_epoch, save_frq):
        x = torch.ones(1000)
        global_step = 0 
        epoch = 0
        min_loss = 100000000000
        self.model.train()
        while(epoch<epoch_num):
            for i in range(step_a_epoch):
                loss1 = 0.0
                loss2 = 0.0
                self.optimizer.zero_grad()
                #img = self.model(x)
                img = self.model.weight.view(1,3,400,400)
                for j in range(2,4):
                    logit_cs = feature_map(img, j)
                    logit_c = feature_map(self.img_c, j)
                    loss1 += torch.sum((logit_cs-logit_c)**2)*0.5
                for j in range(len(a)):
                    gram_ss,m,n = Gram(img, j)
                    gram_s,m,n = Gram(self.img_s, j)
                    loss2 += torch.sum((gram_ss-gram_s)**2)/(4*m**2*n**2)
                loss = loss1 + 0.01*loss2
                loss.backward()
                self.optimizer.step()
                if global_step%save_frq == 0 and loss<min_loss:
                    min_loss = loss
                    save_model(self.model,self.optimizer, self.scheduler, global_step, m = 1)
                    print('learning rate:    {:6f}'.format(self.scheduler.get_lr()[0]))
                    print('step:    {:}    loss:    {:}'.format(global_step,loss))
                global_step += 1 
            epoch += 1
            self.scheduler.step(epoch)
print('Preparing training/validation loaders...')
train_loader = BunoDataloader(BunoDataset(train_bunos), train_batch_size)
val_loader = BunoDataloader(BunoDataset(val_bunos), val_batch_size)
print('Done!\n')

mission_feat_dim = train_loader.mission_feat_dim
maint_feat_dim = train_loader.maint_feat_dim

mission_model = Linear(in_features=mission_feat_dim, out_features=1)
mission_model.weight.data *= 0.0
mission_model.bias.data *= 0.0
maint_model = Linear(in_features=maint_feat_dim, out_features=1)
maint_model.weight.data *= 0.0
maint_model.bias.data *= 0.0
mission_model.to(device)
maint_model.to(device)

if continue_fname:
    load_path = os.path.join('saved-sessions', continue_fname)
    print(f'Loading saved session from {load_path}...')

    ckpt = torch.load(load_path)
    mission_model.load_state_dict(ckpt['mission_state_dict'])
    maint_model.load_state_dict(ckpt['maint_state_dict'])
    train_losses = ckpt['train_losses']
    val_accs = ckpt['val_accs']
    time_elapsed = ckpt['time_elapsed']

    print('Done!\n')