Python Adam.step Examples

Programming Language: Python

Namespace/Package Name: fairscale.optim

Class/Type: Adam

Method/Function: step

Examples at hotexamples.com: 3

Python Adam.step - 3 examples found. These are the top rated real world Python examples of fairscale.optim.Adam.step extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Adam(23)

step(3)

zero_grad(3)

_optim_scale(2)

_build_fp32_params(1)

_optim_scale_update_freq(1)

fp32_param_groups(1)

load_state_dict(1)

Example #1

Show file

def test_exploding_optimizer_state():
    weight = torch.tensor([[float("inf")]]).half().cuda().requires_grad_()
    input = torch.tensor([1.0]).half().cuda().requires_grad_()

    optimizer = Adam([weight], lr=1e-3, precision=Precision.PURE_FP16)
    optimizer._optim_scale = 1.0

    optimizer.zero_grad()
    loss = (weight.mv(input)).pow(2).sum()
    loss.backward()
    with pytest.raises(RuntimeError):
        optimizer.step()

Example #2

Show file

def test_update_optim_scale():
    weight, bias, input = make_half_precision_params()
    optimizer = Adam([weight, bias], lr=1e-3, precision=Precision.PURE_FP16)
    optimizer._optim_scale_update_freq = 1
    optimizer._optim_scale = 2**15

    optimizer.zero_grad()
    loss = (weight.mv(input) + bias).pow(2).sum()
    loss.backward()
    optimizer.step()

    assert optimizer._optim_scale == 2**16

Example #3

Show file

def state_dict_test(optimizer, weight, bias, input):
    def fn_base(optimizer, weight, bias, input):
        optimizer.zero_grad()
        loss = (weight.mv(input) + bias).pow(2).sum()
        loss.backward()
        return loss

    fn = functools.partial(fn_base, optimizer, weight, bias, input)

    # Prime the optimizer
    for _i in range(5):
        optimizer.step(fn)
    # Clone the weights and construct new optimizer for them
    weight_c = weight.data.clone().requires_grad_()
    bias_c = bias.data.clone().requires_grad_()
    optimizer_c = Adam([weight_c, bias_c],
                       lr=1e-3,
                       precision=optimizer.precision)
    fn_c = functools.partial(fn_base, optimizer_c, weight_c, bias_c, input)
    # Load state dict
    state_dict = deepcopy(optimizer.state_dict())
    optimizer_c.load_state_dict(state_dict)

    for group, group_c in zip(optimizer.param_groups,
                              optimizer_c.param_groups):
        for p, p_c in zip(group["params"], group_c["params"]):
            assert torch.equal(optimizer.state[p]["exp_avg"],
                               optimizer_c.state[p_c]["exp_avg"])
            assert torch.equal(optimizer.state[p]["exp_avg_sq"],
                               optimizer_c.state[p_c]["exp_avg_sq"])

    if optimizer.fp32_param_groups:
        # When using mixed precision, fp32_param_groups are made from FP16 params rather than
        # copied via state_dict, introducing differences between the original optimizer and
        # the copy. Because this test requires that they be the exact same, we copy the
        # fp32 params from the original optimizer to the copy
        optimizer_c.fp32_param_groups = deepcopy(optimizer.fp32_param_groups)

    # Run both optimizations in parallel
    for _i in range(5):
        optimizer.step(fn)
        optimizer_c.step(fn_c)

        assert torch.equal(weight, weight_c)
        assert torch.equal(bias, bias_c)