def optimize(loss, params, trainable_varaibles=None):
    """Minimizes the loss.

  Args:
    loss: The loss to minimize.
    params: A dictionary of hyperparameters.
    trainable_varaibles: list of variables to optimize or None to use all trainable variables.

  Returns:
    The loss minimization op.
  """
    global_step = tf.train.get_or_create_global_step()
    decay_type = params.get("decay_type")

    if decay_type is not None:
        decay_fn = learning_rate_decay_fn(
            decay_type,
            params["decay_rate"],
            params["decay_steps"],
            decay_step_duration=params.get("decay_step_duration", 1),
            staircase=params.get("staircase", True),
            start_decay_steps=params.get("start_decay_steps", 0),
            minimum_learning_rate=params.get("minimum_learning_rate", 0))
    else:
        decay_fn = None

    learning_rate = float(params["learning_rate"])
    print("learning_rate: %s" % learning_rate)
    clip_gradients = params.get("clip_gradients")
    if clip_gradients is not None:
        clip_gradients = float(clip_gradients)
        print("clip_gradients: %s" % clip_gradients)

    optimizer_class = get_optimizer_class(params["optimizer"])
    optimizer_params = params.get("optimizer_params", {})

    if optimizer_class.__name__ == "AdafactorOptimizer":
        optimizer = optimizers.get_adafactor_optimizer_from_params(
            optimizer_class, optimizer_params)
    else:
        optimizer = lambda lr: optimizer_class(lr, **optimizer_params)

    regularization = params.get("regularization")
    if regularization is not None:
        loss += regularization_penalty(regularization["type"],
                                       regularization["scale"])

    return tf.contrib.layers.optimize_loss(loss,
                                           global_step,
                                           learning_rate,
                                           optimizer,
                                           clip_gradients=clip_gradients,
                                           learning_rate_decay_fn=decay_fn,
                                           variables=trainable_varaibles,
                                           name="optim",
                                           summaries=[
                                               "learning_rate",
                                               "global_gradient_norm",
                                           ],
                                           colocate_gradients_with_ops=True)
Example #2
0
def optimize(loss, params, mixed_precision=False):
  """Minimizes the loss.

  Args:
    loss: The loss to minimize.
    params: A dictionary of hyperparameters.
    mixed_precision: If ``True``, wraps the optimizer to maintain a float32 copy
      of the weights.

  Returns:
    The loss minimization op.
  """
  regularization = params.get("regularization")
  if regularization is not None:
    loss += regularization_penalty(regularization["type"], regularization["scale"])

  global_step = tf.train.get_or_create_global_step()
  with tf.variable_scope("optim"):
    # Learning rate.
    learning_rate = tf.get_variable(
        "learning_rate",
        [],
        trainable=False,
        initializer=tf.constant_initializer(float(params["learning_rate"])))
    if "decay_type" in params:
      decay_fn = learning_rate_decay_fn(
          params["decay_type"],
          params["decay_rate"],
          params["decay_steps"],
          decay_step_duration=params.get("decay_step_duration", 1),
          staircase=params.get("staircase", True),
          start_decay_steps=params.get("start_decay_steps", 0),
          minimum_learning_rate=params.get("minimum_learning_rate", 0))
      learning_rate = decay_fn(learning_rate, global_step)
    tf.summary.scalar("learning_rate", learning_rate)

    # Optimizer.
    optimizer_class = get_optimizer_class(params["optimizer"])
    optimizer_params = params.get("optimizer_params", {})
    if optimizer_class.__name__ == "AdafactorOptimizer":
      optimizer = optimizers.get_adafactor_optimizer_from_params(
          optimizer_class, optimizer_params, learning_rate=learning_rate)
    else:
      optimizer = optimizer_class(learning_rate, **optimizer_params)
    if mixed_precision:
      optimizer = optimizers.MixedPrecisionOptimizerWrapper(
          optimizer, loss_scale=get_loss_scale_from_params(params))

    # Gradients.
    gradients = optimizer.compute_gradients(loss, colocate_gradients_with_ops=True)
    _summarize_gradients_norm("global_norm/gradient_norm", gradients)
    if "clip_gradients" in params:
      gradients = _clip_gradients_by_norm(gradients, float(params["clip_gradients"]))
      _summarize_gradients_norm("global_norm/clipped_gradient_norm", gradients)

    return optimizer.apply_gradients(gradients, global_step=global_step)
Example #3
0
def optimize(loss, params, mixed_precision=False):
    """Minimizes the loss.

  Args:
    loss: The loss to minimize.
    params: A dictionary of hyperparameters.
    mixed_precision: If ``True``, wraps the optimizer to maintain a float32 copy
      of the weights.

  Returns:
    The loss minimization op.
  """
    global_step = tf.train.get_or_create_global_step()
    decay_type = params.get("decay_type")

    if decay_type is not None:
        decay_fn = learning_rate_decay_fn(
            decay_type,
            params["decay_rate"],
            params["decay_steps"],
            decay_step_duration=params.get("decay_step_duration", 1),
            staircase=params.get("staircase", True),
            start_decay_steps=params.get("start_decay_steps", 0),
            minimum_learning_rate=params.get("minimum_learning_rate", 0))
    else:
        decay_fn = None

    learning_rate = float(params["learning_rate"])
    clip_gradients = params.get("clip_gradients")
    if clip_gradients is not None:
        clip_gradients = float(clip_gradients)

    optimizer_class = get_optimizer_class(params["optimizer"])
    optimizer_params = params.get("optimizer_params", {})

    if optimizer_class.__name__ == "AdafactorOptimizer":
        optimizer = optimizers.get_adafactor_optimizer_from_params(
            optimizer_class, optimizer_params)
    else:
        optimizer = lambda lr: optimizer_class(lr, **optimizer_params)

    if mixed_precision:
        optimizer_fn = lambda lr: optimizers.MixedPrecisionOptimizerWrapper(
            optimizer(lr), loss_scale=get_loss_scale_from_params(params))
    else:
        optimizer_fn = optimizer

    regularization = params.get("regularization")
    if regularization is not None:
        loss += regularization_penalty(regularization["type"],
                                       regularization["scale"])

    return tf.contrib.layers.optimize_loss(loss,
                                           global_step,
                                           learning_rate,
                                           optimizer_fn,
                                           clip_gradients=clip_gradients,
                                           learning_rate_decay_fn=decay_fn,
                                           name="optim",
                                           summaries=[
                                               "learning_rate",
                                               "global_gradient_norm",
                                           ],
                                           colocate_gradients_with_ops=True)
Example #4
0
def optimize_loss(loss,
                  params,
                  mixed_precision=False,
                  var_list=None,
                  hvd=None):
    """Minimizes the loss.

  Args:
    loss: The loss to minimize.
    params: A dictionary of hyperparameters.
    mixed_precision: If ``True``, wraps the optimizer to maintain a float32 copy
      of the weights.
    var_list: The variables to update.
    hvd: Optional Horovod object.

  Returns:
    The loss minimization op and a list of internal variables to initialize.
  """
    regularization = params.get("regularization")
    if regularization is not None:
        loss += regularization_penalty(regularization["type"],
                                       regularization["scale"],
                                       weights_list=var_list)

    global_step = tf.train.get_or_create_global_step()
    with tf.variable_scope("optim"):
        learning_rate = tf.constant(params["learning_rate"], dtype=tf.float32)
        if params.get("decay_type") is not None:
            decay_params = params.get("decay_params", {})
            if "decay_rate" in params:
                # Backward compatibility: fill params from previous options.
                decay_params["decay_rate"] = params["decay_rate"]
                decay_params["decay_steps"] = params["decay_steps"]
                decay_params["staircase"] = params.get("staircase", True)
            decay_fn = learning_rate_decay_fn_v2(
                params["decay_type"],
                decay_params=decay_params,
                decay_step_duration=params.get("decay_step_duration", 1),
                start_decay_step=params.get("start_decay_steps", 0),
                minimum_learning_rate=params.get("minimum_learning_rate", 0))
            learning_rate = decay_fn(learning_rate, global_step)
        tf.summary.scalar("learning_rate", learning_rate)

        # Optimizer.
        optimizer_class = get_optimizer_class(params["optimizer"])
        optimizer_params = params.get("optimizer_params", {})
        if optimizer_class.__name__ == "AdafactorOptimizer":
            optimizer = optimizers.get_adafactor_optimizer_from_params(
                optimizer_class, optimizer_params, learning_rate=learning_rate)
        else:
            weight_decay = params.get("weight_decay")
            if weight_decay is not None:
                optimizer_class = tf.contrib.opt.extend_with_decoupled_weight_decay(
                    optimizer_class)
                optimizer = optimizer_class(weight_decay,
                                            learning_rate=learning_rate,
                                            **optimizer_params)
            else:
                optimizer = optimizer_class(learning_rate, **optimizer_params)
        if mixed_precision:
            from opennmt.optimizers.mixed_precision_wrapper import get_loss_scale_from_params
            optimizer = optimizers.MixedPrecisionOptimizerWrapper(
                optimizer, loss_scale=get_loss_scale_from_params(params))
        if hvd is not None:
            from opennmt.optimizers.distributed_optimizer import DistributedOptimizer
            optimizer = DistributedOptimizer.from_params(
                optimizer, params=params.get("horovod"))

        # Gradients.
        var_list = _get_trainable_variables(
            var_list=var_list, freeze_variables=params.get("freeze_variables"))
        gradients = optimizer.compute_gradients(
            loss, var_list=var_list, colocate_gradients_with_ops=True)
        _summarize_gradients_norm("global_norm/gradient_norm", gradients)
        if params.get("clip_gradients") is not None:
            gradients = _clip_gradients_by_norm(
                gradients, float(params["clip_gradients"]))
            _summarize_gradients_norm("global_norm/clipped_gradient_norm",
                                      gradients)

        return delayed_update(optimizer,
                              gradients,
                              global_step,
                              accum_count=params.get("gradients_accum", 1))
Example #5
0
def optimize_loss(loss, params, mixed_precision=False, var_list=None):
  """Minimizes the loss.

  Args:
    loss: The loss to minimize.
    params: A dictionary of hyperparameters.
    mixed_precision: If ``True``, wraps the optimizer to maintain a float32 copy
      of the weights.
    var_list: The variables to update. #TODO: however var_list are not passed from model.py, how, assumed in the scope?

  Returns:
    The loss minimization op and a list of internal variables to initialize.
  """
  regularization = params.get("regularization")
  if regularization is not None:
    loss += regularization_penalty(
        regularization["type"], regularization["scale"], weights_list=var_list) #TODO

  global_step = tf.train.get_or_create_global_step()
  with tf.variable_scope("optim"):
    # Learning rate.
    learning_rate = tf.get_variable(
        "learning_rate",
        [],
        trainable=False,
        initializer=tf.constant_initializer(float(params["learning_rate"])))
    if params.get("decay_type") is not None:
      decay_params = params.get("decay_params", {})
      if "decay_rate" in params:
        # Backward compatibility: fill para                                                                                                                                                                                                                                                                                                                                                         ms from previous options.
        decay_params["decay_rate"] = params["decay_rate"]
        decay_params["decay_steps"] = params["decay_steps"]
        decay_params["staircase"] = params.get("staircase", True)
      decay_fn = learning_rate_decay_fn_v2(
          params["decay_type"],
          decay_params=decay_params,
          decay_step_duration=params.get("decay_step_duration", 1),
          start_decay_step=params.get("start_decay_steps", 0),
          minimum_learning_rate=params.get("minimum_learning_rate", 0))
      learning_rate = decay_fn(learning_rate, global_step)
    tf.summary.scalar("learning_rate", learning_rate)

    # Optimizer.
    optimizer_class = get_optimizer_class(params["optimizer"])
    optimizer_params = params.get("optimizer_params", {})
    if optimizer_class.__name__ == "AdafactorOptimizer":
      optimizer = optimizers.get_adafactor_optimizer_from_params(
          optimizer_class, optimizer_params, learning_rate=learning_rate)
    else:
      optimizer = optimizer_class(learning_rate, **optimizer_params)
    if mixed_precision:
      optimizer = optimizers.MixedPrecisionOptimizerWrapper(
          optimizer, loss_scale=get_loss_scale_from_params(params))

    # Gradients.
    #gradients = optimizer.compute_gradients(loss, colocate_gradients_with_ops=True)
    gradients = optimizer.compute_gradients(
        loss, var_list=var_list, colocate_gradients_with_ops=True) #TODO, which var_list ?


    """
    # Gradients. NEW with freeze_update
    freeze_params = params.get("freeze")
    if False: #True: #freeze_params is not None:
        tf.logging.info("Optimizing selected network components:", freeze_params)
        # TODO: then get the variable list to upadte before backprob
        # create a scope and get the variables as done for the learning-rate or with a separate variable list
        #var_list = tf.trainable_variables()
        # TODO: #, var_list) # the called method only does a selection of variables to update from the trainable var_list
        variable_list = freeze_update_op(freeze_params)
        tf.logging.info("Parameters being optimized:", variable_list)
        gradients = optimizer.compute_gradients(loss, var_list=variable_list, colocate_gradients_with_ops=True)
    else:
        tf.logging.info("OPTIMIZING ALL NET COMPONENTS NORMALLY >>> :")
        gradients = optimizer.compute_gradients(loss, colocate_gradients_with_ops=True)
    """



    _summarize_gradients_norm("global_norm/gradient_norm", gradients)
    if params.get("clip_gradients") is not None:
      gradients = _clip_gradients_by_norm(gradients, float(params["clip_gradients"]))
      _summarize_gradients_norm("global_norm/clipped_gradient_norm", gradients)

    return delayed_update(
        optimizer,
        gradients,
        global_step,
        accum_count=params.get("gradients_accum", 1))