コード例 #1
0
    def apply_gradients(self, grads_and_vars):
        self._iterations += 1
        grads, var_list = list(zip(*grads_and_vars))
        new_grads = []

        if self._summaries:
            summary.scalar("optimizer/scale", self._scale,
                           utils.get_global_step())

        for grad in grads:
            if grad is None:
                new_grads.append(None)
                continue

            norm = grad.data.norm()

            if not torch.isfinite(norm):
                self._update_if_not_finite_grads()
                return
            else:
                # Rescale gradients
                new_grads.append(grad.data.float().mul_(1.0 / self._scale))

        self._update_if_finite_grads()
        self._optimizer.apply_gradients(zip(new_grads, var_list))
コード例 #2
0
def _save_summary(grads_and_vars):
    total_norm = 0.0

    for grad, var in grads_and_vars:
        if grad is None:
            continue

        _, var = var
        grad_norm = grad.data.norm()
        total_norm += grad_norm**2
        summary.histogram(var.tensor_name, var, utils.get_global_step())
        summary.scalar("norm/" + var.tensor_name, var.norm(),
                       utils.get_global_step())
        summary.scalar("grad_norm/" + var.tensor_name, grad_norm,
                       utils.get_global_step())

    total_norm = total_norm**0.5
    summary.scalar("grad_norm", total_norm, utils.get_global_step())

    return float(total_norm)
コード例 #3
0
ファイル: schedules.py プロジェクト: jwy-sss/NLP-21
    def __call__(self, step):
        if step <= self._warmup_steps:
            lr_step = self._maximum_learning_rate - self._initial_learning_rate
            lr_step /= self._warmup_steps
            lr = self._initial_learning_rate + lr_step * step
        else:
            lr = self._maximum_learning_rate

            if self._warmup_steps != 0:
                # approximately hidden_size ** -0.5
                lr = lr * self._warmup_steps**0.5

            lr = lr * (step**-0.5)

        if self._summary:
            summary.scalar("learning_rate", lr, utils.get_global_step())

        return lr
コード例 #4
0
ファイル: schedules.py プロジェクト: jwy-sss/NLP-21
    def __call__(self, step):
        # See reference: The Best of Both Worlds: Combining Recent Advances
        # in Neural Machine Translation
        n = self._n
        p = self._warmup_steps / n
        s = n * self._start_decay_step
        e = n * self._end_decay_step

        learning_rate = self._learning_rate

        learning_rate *= min(
            1.0 + (n - 1) * step / float(n * p), n,
            n * ((2 * n)**(float(s - n * step) / float(e - s))))

        if self._summary:
            summary.scalar("learning_rate", learning_rate,
                           utils.get_global_step())

        return learning_rate
コード例 #5
0
ファイル: schedules.py プロジェクト: jwy-sss/NLP-21
    def __call__(self, step):
        boundaries = self._boundaries
        values = self._values
        learning_rate = values[0]

        if step <= boundaries[0]:
            learning_rate = values[0]
        elif step > boundaries[-1]:
            learning_rate = values[-1]
        else:
            for low, high, v in zip(boundaries[:-1], boundaries[1:],
                                    values[1:-1]):

                if step > low and step <= high:
                    learning_rate = v
                    break

        if self._summary:
            summary.scalar("learning_rate", learning_rate,
                           utils.get_global_step())

        return learning_rate