Esempio n. 1
0
    def test_lambda_lr(test_case):
        optimizer = flow.optim.SGD(
            [
                {
                    "params": [Parameter(flow.Tensor([1.0]))]
                },
                {
                    "params": [Parameter(flow.Tensor([1.0]))]
                },
            ],
            lr=TestLrScheduler.base_lr,
        )
        lambdas = [lambda step: step // 30, lambda step: 0.95 * step]

        def lambda_lr_step(base_lrs, current_step):
            return [
                base_lr * lmbda(current_step)
                for base_lr, lmbda in zip(base_lrs, lambdas)
            ]

        lambda_lr = flow.optim.lr_scheduler.LambdaLR(optimizer,
                                                     lr_lambda=lambdas)

        for i in range(1, 21):
            lambda_lr.step()
            new_lrs = lambda_lr_step(lambda_lr.base_lrs, i)
            for lr1, lr2 in zip(lambda_lr.get_last_lr(), new_lrs):
                test_case.assertAlmostEqual(lr1, lr2, places=5)
Esempio n. 2
0
    def _apply(self, fn):
        for module in self.children():
            module._apply(fn)

        for key, param in self._parameters.items():
            if param is not None:
                assert isinstance(param, Parameter)
                assert param.is_leaf
                with flow.no_grad():
                    param_applied = fn(param)
                self._parameters[key] = Parameter(param_applied,
                                                  param.requires_grad)

                if param.grad is not None:
                    assert param.grad.is_leaf
                    with flow.no_grad():
                        grad_applied = fn(param.grad)
                    self._parameters[key].grad = grad_applied.requires_grad_(
                        param.grad.requires_grad)

        for key, buf in self._buffers.items():
            if buf is not None:
                self._buffers[key] = fn(buf)

        return self
Esempio n. 3
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value))
        param_list = list()
        param_list.append(x)
        rmsprop = flow.optim.RMSprop(
            [{
                "param": param_list
            }],
            lr=learning_rate,
            momentum=momentum,
            scale=scale,
            alpha=alpha,
            eps=eps,
            weight_decay=weight_decay,
            centered=centered,
        )

        def train_one_iter(grad):
            grad_tensor = flow.Tensor(grad, requires_grad=False)
            loss = x * grad_tensor
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            rmsprop.step()
            rmsprop.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
        return x
Esempio n. 4
0
class TestLrScheduler(flow.unittest.TestCase):
    base_lr = 1.0
    optimizer = flow.optim.SGD([{
        "params": [Parameter(flow.Tensor([1.0]))]
    }],
                               lr=base_lr)

    def test_cosine_scheduler(test_case):
        def cosine_scheduler_step(base_lr, current_step, steps, alpha):
            if current_step < steps:
                cos_decay = 0.5 * (1 +
                                   math.cos(math.pi * current_step / steps))
                decay_factor = (1 - alpha) * cos_decay + alpha
                return base_lr * decay_factor
            else:
                return base_lr * alpha

        alpha = 0.5
        steps = 10
        cosine_scheduler = flow.optim.lr_scheduler.CosineScheduler(
            TestLrScheduler.optimizer, steps=10, alpha=0.5)

        for i in range(1, 21):
            cosine_scheduler.step()
            new_lr = cosine_scheduler_step(TestLrScheduler.base_lr, i, steps,
                                           alpha)
            test_case.assertAlmostEqual(cosine_scheduler.get_last_lr()[0],
                                        new_lr,
                                        places=4)
Esempio n. 5
0
    def _apply(self, fn):
        for module in self.children():
            module._apply(fn)

        for key, param in self._parameters.items():
            if param is not None:
                assert isinstance(param, Parameter)
                assert param.is_leaf
                with flow.no_grad():
                    # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring
                    param_applied = Tensor(fn(param))
                self._parameters[key] = Parameter(param_applied,
                                                  param.requires_grad)

                if param.grad is not None:
                    assert param.grad.is_leaf
                    with flow.no_grad():
                        # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring
                        grad_applied = Tensor(fn(param.grad))
                    self._parameters[key].grad = grad_applied.requires_grad_(
                        param.grad.requires_grad)

        for key, buf in self._buffers.items():
            if buf is not None:
                # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring
                self._buffers[key] = Tensor(fn(buf))

        return self
Esempio n. 6
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        adam = flow.optim.Adam(
            [
                {
                    "params": [x],
                    "lr": learning_rate,
                    "betas": betas,
                    "eps": eps,
                    "weight_decay": weight_decay,
                    "scale": scale,
                }
            ]
        )

        def train_one_iter(grad):
            grad_tensor = flow.Tensor(
                grad, requires_grad=False, device=flow.device(device)
            )
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            adam.step()
            adam.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
        return x
Esempio n. 7
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        param_list = list()
        param_list.append(x)
        rmsprop = flow.optim.RMSprop([{
            "params": param_list,
            "lr": learning_rate,
            "alpha": alpha,
            "eps": eps,
            "weight_decay": weight_decay,
            "momentum": momentum,
            "centered": centered,
            "scale": scale,
        }])

        def train_one_iter(grad):
            grad_tensor = flow.Tensor(grad,
                                      requires_grad=False,
                                      device=flow.device(device))
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            rmsprop.step()
            rmsprop.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
        return x
Esempio n. 8
0
    def test_step_lr(test_case):
        optimizer = flow.optim.SGD(
            [{"params": [Parameter(flow.Tensor([1.0]))]}], lr=TestLrScheduler.base_lr
        )

        def step_lr_step(base_lr, current_step, step_size, gamma):
            return base_lr * (gamma ** (current_step // step_size))

        gamma = 0.1
        step_size = 5
        step_lr = flow.optim.lr_scheduler.StepLR(
            optimizer, step_size=step_size, gamma=gamma
        )

        for i in range(1, 21):
            step_lr.step()
            new_lr = step_lr_step(TestLrScheduler.base_lr, i, step_size, gamma)
            test_case.assertAlmostEqual(step_lr.get_last_lr()[0], new_lr, places=5)
Esempio n. 9
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value))
        param_list = list()
        param_list.append(x)
        sgd = flow.optim.SGD(
            [{"param": param_list}], lr=learning_rate, momentum=momentum, scale=scale
        )

        def train_one_iter(grad):
            grad_tensor = flow.Tensor(grad, requires_grad=False)
            loss = x * grad_tensor
            # BUG: loss = flow.sum(x * grad_tensor)
            grad = flow.Tensor(np.ones(list(loss.shape)))
            loss.backward(grad)
            sgd.step()
            sgd.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
        return x
Esempio n. 10
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        sgd = flow.optim.SGD([{
            "params": [x],
            "lr": learning_rate,
            "momentum": momentum,
            "scale": scale
        }])

        def train_one_iter(grad):
            grad_tensor = flow.Tensor(grad,
                                      requires_grad=False,
                                      device=flow.device(device))
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            sgd.step()
            sgd.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
        return x
Esempio n. 11
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value))
        param_list = list()
        param_list.append(x)
        adam = flow.optim.AdamW(
            [{
                "param": param_list
            }],
            lr=learning_rate,
            scale=scale,
            weight_decay=weight_decay,
        )

        def train_one_iter(grad):
            grad_tensor = flow.Tensor(grad, requires_grad=False)
            loss = x * grad_tensor
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            adam.step()
            adam.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
        return x
Esempio n. 12
0
 def __init__(self):
     super().__init__()
     self.w = Parameter(flow.Tensor(para))