コード例 #1
0
  def test_load_and_remap_linear_multiclass_initializer_default_init(self):
    """Tests where the zeros_initializer default is used for linear."""
    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
        new_row_vocab_size=5,
        new_col_vocab_file=self.new_class_vocab_file,
        old_col_vocab_file=self.old_class_vocab_file,
        new_col_vocab_size=4,
        old_tensor_name='some_scope/embeddings',
        ckpt_path=[self.checkpoint_file],
        new_row_vocab_file=self.new_feature_vocab_file,
        old_row_vocab_file=self.old_feature_vocab_file,
        num_row_oov_buckets=1,
        num_col_oov_buckets=1))

    expected_remapped_matrix = np.concatenate(
        [
            np.reshape([2, 18, 34, 50, 0, 0], [6, 1]),
            np.reshape([0, 16, 32, 48, 0, 0], [6, 1]),
            np.reshape([0] * 6, [6, 1]),
            np.reshape([1, 17, 33, 49, 0, 0], [6, 1]),
            np.reshape([0] * 6, [6, 1])
        ],
        axis=1)

    remapped_matrix = variable_scope.get_variable(
        name='linear_init_fallback/obtained_weight_matrix',
        shape=[6, 5],
        initializer=loading_initializer,
        partitioner=partitioned_variables.fixed_size_partitioner(2))

    with self.test_session():
      variables.global_variables_initializer().run()
      self.assertAllClose(expected_remapped_matrix,
                          remapped_matrix.as_tensor().eval())
コード例 #2
0
  def test_load_and_remap_output_layer_weight_initializer_dnn_output(self):
    """Tests for the output layer initializer in the DNN output case."""
    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
        new_row_vocab_size=5,
        new_col_vocab_file=self.new_class_vocab_file,
        old_col_vocab_file=self.old_class_vocab_file,
        new_col_vocab_size=4,
        old_tensor_name='some_scope/embeddings',
        ckpt_path=[self.checkpoint_file],
        num_col_oov_buckets=1,
        initializer=self.initializer))

    expected_remapped_matrix = np.concatenate(
        [
            np.reshape([2, 18, 34, 50, 66], [5, 1]),
            np.reshape([0, 16, 32, 48, 64], [5, 1]),
            np.reshape([self.init_val] * 5, [5, 1]),
            np.reshape([1, 17, 33, 49, 65], [5, 1]),
            np.reshape([self.init_val] * 5, [5, 1])
        ],
        axis=1)

    # The new weight matrix is of size
    # [5-sized input layer, 4 class vocab + 1 class OOV].
    remapped_matrix = variable_scope.get_variable(
        name='dnn_output/obtained_weight_matrix',
        shape=[5, 5],
        initializer=loading_initializer,
        partitioner=partitioned_variables.fixed_size_partitioner(2))

    with self.test_session():
      variables.global_variables_initializer().run()
      self.assertAllClose(expected_remapped_matrix,
                          remapped_matrix.as_tensor().eval())
コード例 #3
0
    def test_load_and_remap_linear_multiclass_initializer_default_init(self):
        """Tests where the zeros_initializer default is used for linear."""
        loading_initializer = (
            checkpoint_ops._load_and_remap_matrix_initializer(
                new_row_vocab_size=5,
                new_col_vocab_file=self.new_class_vocab_file,
                old_col_vocab_file=self.old_class_vocab_file,
                new_col_vocab_size=4,
                old_tensor_name='some_scope/embeddings',
                ckpt_path=[self.checkpoint_file],
                new_row_vocab_file=self.new_feature_vocab_file,
                old_row_vocab_file=self.old_feature_vocab_file,
                num_row_oov_buckets=1,
                num_col_oov_buckets=1))

        # Same as test_initializer_with_oov_only_partition, but with zero
        # initialization.
        expected_remapped_matrix = np.concatenate([
            np.reshape([2, 18, 34, 50, 0, 0], [6, 1]),
            np.reshape([0, 16, 32, 48, 0, 0], [6, 1]),
            np.reshape([0] * 6, [6, 1]),
            np.reshape([1, 17, 33, 49, 0, 0], [6, 1]),
            np.reshape([0] * 6, [6, 1])
        ],
                                                  axis=1)
        remapped_matrix = variable_scope.get_variable(
            name='linear_init_fallback/obtained_weight_matrix',
            shape=[6, 5],
            initializer=loading_initializer,
            partitioner=partitioned_variables.fixed_size_partitioner(2))

        with self.cached_session():
            variables.global_variables_initializer().run()
            self.assertAllClose(expected_remapped_matrix,
                                remapped_matrix.as_tensor().eval())
コード例 #4
0
    def test_load_and_remap_output_layer_weight_initializer_dnn_output(self):
        """Tests for the output layer initializer in the DNN output case."""
        loading_initializer = (
            checkpoint_ops._load_and_remap_matrix_initializer(
                new_row_vocab_size=5,
                new_col_vocab_file=self.new_class_vocab_file,
                old_col_vocab_file=self.old_class_vocab_file,
                new_col_vocab_size=4,
                old_tensor_name='some_scope/embeddings',
                ckpt_path=[self.checkpoint_file],
                num_col_oov_buckets=1,
                initializer=self.initializer))

        # The new weight matrix is of size
        # [5-sized input layer, 4 class vocab + 1 class OOV].
        expected_remapped_matrix = np.concatenate([
            np.reshape([2, 18, 34, 50, 66], [5, 1]),
            np.reshape([0, 16, 32, 48, 64], [5, 1]),
            np.reshape([self.init_val] * 5, [5, 1]),
            np.reshape([1, 17, 33, 49, 65], [5, 1]),
            np.reshape([self.init_val] * 5, [5, 1])
        ],
                                                  axis=1)
        remapped_matrix = variable_scope.get_variable(
            name='dnn_output/obtained_weight_matrix',
            shape=[5, 5],
            initializer=loading_initializer,
            partitioner=partitioned_variables.fixed_size_partitioner(2))

        with self.cached_session():
            variables.global_variables_initializer().run()
            self.assertAllClose(expected_remapped_matrix,
                                remapped_matrix.as_tensor().eval())
コード例 #5
0
  def test_initializer_with_oov_only_partition(self):
    """Tests for the output layer initializer where one partition is all OOV."""
    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
        new_row_vocab_size=5,
        new_col_vocab_file=self.new_class_vocab_file,
        old_col_vocab_file=self.old_class_vocab_file,
        new_col_vocab_size=4,
        old_tensor_name='some_scope/embeddings',
        ckpt_path=[self.checkpoint_file],
        new_row_vocab_file=self.new_feature_vocab_file,
        old_row_vocab_file=self.old_feature_vocab_file,
        num_row_oov_buckets=5,
        num_col_oov_buckets=1,
        initializer=self.initializer))

    expected_remapped_matrix = np.concatenate(
        [
            np.reshape([2, 18, 34, 50] + [self.init_val] * 6, [10, 1]),
            np.reshape([0, 16, 32, 48] + [self.init_val] * 6, [10, 1]),
            np.reshape([self.init_val] * 10, [10, 1]),
            np.reshape([1, 17, 33, 49] + [self.init_val] * 6, [10, 1]),
            np.reshape([self.init_val] * 10, [10, 1]),
        ],
        axis=1)

    # The new weight matrix is of size
    # [5 feature vocab + 5 feature OOV, 4 class vocab + 1 class OOV].  The
    # second partition has only OOV.
    remapped_matrix = variable_scope.get_variable(
        name='linear_all_oov/obtained_weight_matrix',
        shape=[10, 5],
        initializer=loading_initializer,
        partitioner=partitioned_variables.fixed_size_partitioner(2))

    with self.test_session():
      variables.global_variables_initializer().run()
      self.assertAllClose(expected_remapped_matrix,
                          remapped_matrix.as_tensor().eval())
コード例 #6
0
  def test_load_and_remap_output_layer_weight_initializer_linear(self):
    """Tests for the output layer initializer in the linear multi-class case."""
    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
        new_row_vocab_size=5,
        new_col_vocab_file=self.new_class_vocab_file,
        old_col_vocab_file=self.old_class_vocab_file,
        new_col_vocab_size=4,
        old_tensor_name='some_scope/embeddings',
        ckpt_path=[self.checkpoint_file],
        new_row_vocab_file=self.new_feature_vocab_file,
        old_row_vocab_file=self.old_feature_vocab_file,
        num_row_oov_buckets=1,
        num_col_oov_buckets=1,
        initializer=self.initializer))

    expected_remapped_matrix = np.concatenate(
        [
            np.reshape([2, 18, 34, 50, self.init_val, self.init_val], [6, 1]),
            np.reshape([0, 16, 32, 48, self.init_val, self.init_val], [6, 1]),
            np.reshape([self.init_val] * 6, [6, 1]),
            np.reshape([1, 17, 33, 49, self.init_val, self.init_val], [6, 1]),
            np.reshape([self.init_val] * 6, [6, 1])
        ],
        axis=1)

    # The new weight matrix is of size
    # [5 feature vocab + 1 feature OOV, 4 class vocab + 1 class OOV].  Use a
    # partitioned variable to confirm that the offset logic works.
    remapped_matrix = variable_scope.get_variable(
        name='linear/obtained_weight_matrix',
        shape=[6, 5],
        initializer=loading_initializer,
        partitioner=partitioned_variables.fixed_size_partitioner(2))

    with self.test_session():
      variables.global_variables_initializer().run()
      self.assertAllClose(expected_remapped_matrix,
                          remapped_matrix.as_tensor().eval())
コード例 #7
0
def _warm_start_var_with_vocab(var,
                               current_vocab_path,
                               current_vocab_size,
                               prev_ckpt,
                               prev_vocab_path,
                               previous_vocab_size=-1,
                               current_oov_buckets=0,
                               prev_tensor_name=None,
                               initializer=None,
                               axis=0):
    """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`.

  Use this method when the `var` is backed by vocabulary. This method stitches
  the given `var` such that values corresponding to individual features in the
  vocabulary remain consistent irrespective of changing order of the features
  between old and new vocabularies.

  Args:
    var: Current graph's variable that needs to be warm-started (initialized).
      Can be either of the following:
      (i) `Variable`
      (ii) `ResourceVariable`
      (iii) list of `Variable`: The list must contain slices of the same larger
        variable.
      (iv) `PartitionedVariable`
    current_vocab_path: Path to the vocab file used for the given `var`.
    current_vocab_size: An `int` specifying the number of entries in the current
      vocab.
    prev_ckpt: A string specifying the directory with checkpoint file(s) or path
      to checkpoint. The given checkpoint must have tensor with name
      `prev_tensor_name` (if not None) or tensor with name same as given `var`.
    prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`.
    previous_vocab_size: If provided, will constrain previous vocab to the first
      `previous_vocab_size` entries.  -1 means use the entire previous vocab.
    current_oov_buckets: An `int` specifying the number of out-of-vocabulary
      buckets used for given `var`.
    prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If
      None, we lookup tensor with same name as given `var`.
    initializer: Variable initializer to be used for missing entries.  If None,
      missing entries will be zero-initialized.
    axis: Axis of the variable that the provided vocabulary corresponds to.

  Raises:
    ValueError: If required args are not provided.
  """
    if not (current_vocab_path and current_vocab_size and prev_ckpt
            and prev_vocab_path):
        raise ValueError(
            "Invalid args: Must provide all of [current_vocab_path, "
            "current_vocab_size, prev_ckpt, prev_vocab_path}.")
    if checkpoint_utils._is_variable(var):
        var = [var]
    elif (isinstance(var, list)
          and all(checkpoint_utils._is_variable(v) for v in var)):
        var = var
    elif isinstance(var, variables_lib.PartitionedVariable):
        var = var._get_variable_list()
    else:
        raise TypeError(
            "var MUST be one of the following: a Variable, list of Variable or "
            "PartitionedVariable, but is {}".format(type(var)))

    if not prev_tensor_name:
        # Assume tensor name remains the same.
        prev_tensor_name = _infer_var_name(var)

    # TODO(eddz): Fix functionality for rank-1 Variables (like FC biases).
    total_v_first_axis = sum(v.get_shape().as_list()[0] for v in var)
    for v in var:
        v_shape = v.get_shape().as_list()
        slice_info = v._get_save_slice_info()
        partition_info = None
        if slice_info:
            partition_info = variable_scope._PartitionInfo(
                full_shape=slice_info.full_shape,
                var_offset=slice_info.var_offset)

        if axis == 0:
            new_row_vocab_size = current_vocab_size
            new_col_vocab_size = v_shape[1]
            old_row_vocab_size = previous_vocab_size
            old_row_vocab_file = prev_vocab_path
            new_row_vocab_file = current_vocab_path
            old_col_vocab_file = None
            new_col_vocab_file = None
            num_row_oov_buckets = current_oov_buckets
            num_col_oov_buckets = 0
        elif axis == 1:
            # Note that we must compute this value across all partitions, whereas
            # in the axis = 0 case, we can simply use v_shape[1] because we don't
            # allow partitioning across axis = 1.
            new_row_vocab_size = total_v_first_axis
            new_col_vocab_size = current_vocab_size
            old_row_vocab_size = -1
            old_row_vocab_file = None
            new_row_vocab_file = None
            old_col_vocab_file = prev_vocab_path
            new_col_vocab_file = current_vocab_path
            num_row_oov_buckets = 0
            num_col_oov_buckets = current_oov_buckets
        else:
            raise ValueError(
                "The only supported values for the axis argument are 0 "
                "and 1.  Provided axis: {}".format(axis))

        init = checkpoint_ops._load_and_remap_matrix_initializer(
            ckpt_path=checkpoint_utils._get_checkpoint_filename(prev_ckpt),
            old_tensor_name=prev_tensor_name,
            new_row_vocab_size=new_row_vocab_size,
            new_col_vocab_size=new_col_vocab_size,
            old_row_vocab_size=old_row_vocab_size,
            old_row_vocab_file=old_row_vocab_file,
            new_row_vocab_file=new_row_vocab_file,
            old_col_vocab_file=old_col_vocab_file,
            new_col_vocab_file=new_col_vocab_file,
            num_row_oov_buckets=num_row_oov_buckets,
            num_col_oov_buckets=num_col_oov_buckets,
            initializer=initializer)
        new_init_val = ops.convert_to_tensor(
            init(shape=v_shape, partition_info=partition_info))
        v._initializer_op = state_ops.assign(v, new_init_val)
コード例 #8
0
def _warm_start_var_with_vocab(var,
                               current_vocab_path,
                               current_vocab_size,
                               prev_ckpt,
                               prev_vocab_path,
                               previous_vocab_size=-1,
                               current_oov_buckets=0,
                               prev_tensor_name=None,
                               initializer=None,
                               axis=0):
  """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`.

  Use this method when the `var` is backed by vocabulary. This method stitches
  the given `var` such that values corresponding to individual features in the
  vocabulary remain consistent irrespective of changing order of the features
  between old and new vocabularies.

  Args:
    var: Current graph's variable that needs to be warm-started (initialized).
      Can be either of the following:
      (i) `Variable`
      (ii) `ResourceVariable`
      (iii) list of `Variable`: The list must contain slices of the same larger
        variable.
      (iv) `PartitionedVariable`
    current_vocab_path: Path to the vocab file used for the given `var`.
    current_vocab_size: An `int` specifying the number of entries in the current
      vocab.
    prev_ckpt: A string specifying the directory with checkpoint file(s) or path
      to checkpoint. The given checkpoint must have tensor with name
      `prev_tensor_name` (if not None) or tensor with name same as given `var`.
    prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`.
    previous_vocab_size: If provided, will constrain previous vocab to the first
      `previous_vocab_size` entries.  -1 means use the entire previous vocab.
    current_oov_buckets: An `int` specifying the number of out-of-vocabulary
      buckets used for given `var`.
    prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If
      None, we lookup tensor with same name as given `var`.
    initializer: Variable initializer to be used for missing entries.  If None,
      missing entries will be zero-initialized.
    axis: Axis of the variable that the provided vocabulary corresponds to.

  Raises:
    ValueError: If required args are not provided.
  """
  if not (current_vocab_path and current_vocab_size and prev_ckpt and
          prev_vocab_path):
    raise ValueError("Invalid args: Must provide all of [current_vocab_path, "
                     "current_vocab_size, prev_ckpt, prev_vocab_path}.")
  if checkpoint_utils._is_variable(var):
    var = [var]
  elif (isinstance(var, list) and
        all(checkpoint_utils._is_variable(v) for v in var)):
    var = var
  elif isinstance(var, variables_lib.PartitionedVariable):
    var = var._get_variable_list()
  else:
    raise TypeError(
        "var MUST be one of the following: a Variable, list of Variable or "
        "PartitionedVariable, but is {}".format(type(var)))

  if not prev_tensor_name:
    # Assume tensor name remains the same.
    prev_tensor_name = _infer_var_name(var)

  # TODO(eddz): Fix functionality for rank-1 Variables (like FC biases).
  total_v_first_axis = sum([v.get_shape().as_list()[0] for v in var])
  for v in var:
    v_shape = v.get_shape().as_list()
    slice_info = v._get_save_slice_info()
    partition_info = None
    if slice_info:
      partition_info = variable_scope._PartitionInfo(
          full_shape=slice_info.full_shape,
          var_offset=slice_info.var_offset)

    if axis == 0:
      new_row_vocab_size = current_vocab_size
      new_col_vocab_size = v_shape[1]
      old_row_vocab_size = previous_vocab_size
      old_row_vocab_file = prev_vocab_path
      new_row_vocab_file = current_vocab_path
      old_col_vocab_file = None
      new_col_vocab_file = None
      num_row_oov_buckets = current_oov_buckets
      num_col_oov_buckets = 0
    elif axis == 1:
      # Note that we must compute this value across all partitions, whereas
      # in the axis = 0 case, we can simply use v_shape[1] because we don't
      # allow partitioning across axis = 1.
      new_row_vocab_size = total_v_first_axis
      new_col_vocab_size = current_vocab_size
      old_row_vocab_size = -1
      old_row_vocab_file = None
      new_row_vocab_file = None
      old_col_vocab_file = prev_vocab_path
      new_col_vocab_file = current_vocab_path
      num_row_oov_buckets = 0
      num_col_oov_buckets = current_oov_buckets
    else:
      raise ValueError("The only supported values for the axis argument are 0 "
                       "and 1.  Provided axis: {}".format(axis))

    init = checkpoint_ops._load_and_remap_matrix_initializer(
        ckpt_path=checkpoint_utils._get_checkpoint_filename(prev_ckpt),
        old_tensor_name=prev_tensor_name,
        new_row_vocab_size=new_row_vocab_size,
        new_col_vocab_size=new_col_vocab_size,
        old_row_vocab_size=old_row_vocab_size,
        old_row_vocab_file=old_row_vocab_file,
        new_row_vocab_file=new_row_vocab_file,
        old_col_vocab_file=old_col_vocab_file,
        new_col_vocab_file=new_col_vocab_file,
        num_row_oov_buckets=num_row_oov_buckets,
        num_col_oov_buckets=num_col_oov_buckets,
        initializer=initializer)
    new_init_val = ops.convert_to_tensor(
        init(shape=v_shape, partition_info=partition_info))
    v._initializer_op = state_ops.assign(v, new_init_val)
コード例 #9
0
def _warmstart_var_with_vocab(var,
                              current_vocab_path,
                              current_vocab_size,
                              prev_ckpt,
                              prev_vocab_path,
                              current_oov_buckets=0,
                              prev_tensor_name=None):
  """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`.

  Use this method when the `var` is backed by vocabulary. This method stitches
  the given `var` such that values corresponding to individual features in the
  vocabulary remain consistent irrespective of changing order of the features
  between old and new vocabularies.

  Args:
    var: Current graph's variable that needs to be warm-started (initialized).
      Can be either of the following:
      (i) `Variable`
      (ii) `ResourceVariable`
      (iii) list of `Variable`: The list must contain slices of the same larger
        variable.
      (iv) `PartitionedVariable`
    current_vocab_path: Path to the vocab file used for the given `var`.
    current_vocab_size: An `int` specifying the number of entries in the current
      vocab.
    prev_ckpt: A string specifying the directory with checkpoint file(s) or path
      to checkpoint. The given checkpoint must have tensor with name
      `prev_tensor_name` (if not None) or tensor with name same as given `var`.
    prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`.
    current_oov_buckets: An `int` specifying the number of out-of-vocabulary
      buckets used for given `var`.
    prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If
      None, we lookup tensor with same name as given `var`.

  Raises:
    ValueError: If required args are not provided.
  """
  if not (current_vocab_path and current_vocab_size and prev_ckpt and
          prev_vocab_path):
    raise ValueError("Invalid args: Must provide all of [current_vocab_path, "
                     "current_vocab_size, prev_ckpt, prev_vocab_path}.")
  if _is_variable(var):
    var = [var]
  elif isinstance(var, list) and all(_is_variable(v) for v in var):
    var = var
  elif isinstance(var, variables.PartitionedVariable):
    var = var._get_variable_list()
  else:
    raise TypeError(
        "var MUST be one of the following: a Variable, list of Variable or "
        "PartitionedVariable, but is {}".format(type(var)))

  if not prev_tensor_name:
    # Assume tensor name remains the same.
    prev_tensor_name = _infer_var_name(var)

  for v in var:
    v_shape = v.get_shape().as_list()
    slice_info = v._get_save_slice_info()
    partition_info = None
    if slice_info:
      partition_info = variable_scope._PartitionInfo(
          full_shape=slice_info.full_shape,
          var_offset=slice_info.var_offset)

    # TODO(vihanjain): This is brittle. Can we instead infer actual initializer
    # used originally for the variable or use a fixed initializer?
    def _missing_ids_init(shape, dtype=None):
      # pylint: disable=cell-var-from-loop
      if dtype and dtype.base_dtype != v.dtype.base_dtype:
        raise ValueError("Trying to initialize missing ids with a different "
                         "dtype `{}` than variable's dtype `{}`".format(
                             dtype, v.dtype))
      return array_ops.slice(v.initial_value, [0, 0], shape)

      # pylint: enable=cell-var-from-loop

    # TODO(vihanjain): Support _WarmstartSettings where class vocabularies need
    # remapping too.
    init = checkpoint_ops._load_and_remap_matrix_initializer(
        ckpt_path=saver.latest_checkpoint(prev_ckpt),
        old_tensor_name=prev_tensor_name,
        new_row_vocab_size=current_vocab_size,
        new_col_vocab_size=v_shape[1],
        old_row_vocab_file=prev_vocab_path,
        new_row_vocab_file=current_vocab_path,
        old_col_vocab_file=None,
        new_col_vocab_file=None,
        num_row_oov_buckets=current_oov_buckets,
        num_col_oov_buckets=0,
        initializer=_missing_ids_init)
    new_init_val = ops.convert_to_tensor(
        init(shape=v_shape, partition_info=partition_info))
    v._initializer_op = state_ops.assign(v, new_init_val)