def test_load_and_remap_linear_multiclass_initializer_default_init(self): """Tests where the zeros_initializer default is used for linear.""" loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( new_row_vocab_size=5, new_col_vocab_file=self.new_class_vocab_file, old_col_vocab_file=self.old_class_vocab_file, new_col_vocab_size=4, old_tensor_name='some_scope/embeddings', ckpt_path=[self.checkpoint_file], new_row_vocab_file=self.new_feature_vocab_file, old_row_vocab_file=self.old_feature_vocab_file, num_row_oov_buckets=1, num_col_oov_buckets=1)) expected_remapped_matrix = np.concatenate( [ np.reshape([2, 18, 34, 50, 0, 0], [6, 1]), np.reshape([0, 16, 32, 48, 0, 0], [6, 1]), np.reshape([0] * 6, [6, 1]), np.reshape([1, 17, 33, 49, 0, 0], [6, 1]), np.reshape([0] * 6, [6, 1]) ], axis=1) remapped_matrix = variable_scope.get_variable( name='linear_init_fallback/obtained_weight_matrix', shape=[6, 5], initializer=loading_initializer, partitioner=partitioned_variables.fixed_size_partitioner(2)) with self.test_session(): variables.global_variables_initializer().run() self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor().eval())
def test_load_and_remap_output_layer_weight_initializer_dnn_output(self): """Tests for the output layer initializer in the DNN output case.""" loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( new_row_vocab_size=5, new_col_vocab_file=self.new_class_vocab_file, old_col_vocab_file=self.old_class_vocab_file, new_col_vocab_size=4, old_tensor_name='some_scope/embeddings', ckpt_path=[self.checkpoint_file], num_col_oov_buckets=1, initializer=self.initializer)) expected_remapped_matrix = np.concatenate( [ np.reshape([2, 18, 34, 50, 66], [5, 1]), np.reshape([0, 16, 32, 48, 64], [5, 1]), np.reshape([self.init_val] * 5, [5, 1]), np.reshape([1, 17, 33, 49, 65], [5, 1]), np.reshape([self.init_val] * 5, [5, 1]) ], axis=1) # The new weight matrix is of size # [5-sized input layer, 4 class vocab + 1 class OOV]. remapped_matrix = variable_scope.get_variable( name='dnn_output/obtained_weight_matrix', shape=[5, 5], initializer=loading_initializer, partitioner=partitioned_variables.fixed_size_partitioner(2)) with self.test_session(): variables.global_variables_initializer().run() self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor().eval())
def test_load_and_remap_linear_multiclass_initializer_default_init(self): """Tests where the zeros_initializer default is used for linear.""" loading_initializer = ( checkpoint_ops._load_and_remap_matrix_initializer( new_row_vocab_size=5, new_col_vocab_file=self.new_class_vocab_file, old_col_vocab_file=self.old_class_vocab_file, new_col_vocab_size=4, old_tensor_name='some_scope/embeddings', ckpt_path=[self.checkpoint_file], new_row_vocab_file=self.new_feature_vocab_file, old_row_vocab_file=self.old_feature_vocab_file, num_row_oov_buckets=1, num_col_oov_buckets=1)) # Same as test_initializer_with_oov_only_partition, but with zero # initialization. expected_remapped_matrix = np.concatenate([ np.reshape([2, 18, 34, 50, 0, 0], [6, 1]), np.reshape([0, 16, 32, 48, 0, 0], [6, 1]), np.reshape([0] * 6, [6, 1]), np.reshape([1, 17, 33, 49, 0, 0], [6, 1]), np.reshape([0] * 6, [6, 1]) ], axis=1) remapped_matrix = variable_scope.get_variable( name='linear_init_fallback/obtained_weight_matrix', shape=[6, 5], initializer=loading_initializer, partitioner=partitioned_variables.fixed_size_partitioner(2)) with self.cached_session(): variables.global_variables_initializer().run() self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor().eval())
def test_load_and_remap_output_layer_weight_initializer_dnn_output(self): """Tests for the output layer initializer in the DNN output case.""" loading_initializer = ( checkpoint_ops._load_and_remap_matrix_initializer( new_row_vocab_size=5, new_col_vocab_file=self.new_class_vocab_file, old_col_vocab_file=self.old_class_vocab_file, new_col_vocab_size=4, old_tensor_name='some_scope/embeddings', ckpt_path=[self.checkpoint_file], num_col_oov_buckets=1, initializer=self.initializer)) # The new weight matrix is of size # [5-sized input layer, 4 class vocab + 1 class OOV]. expected_remapped_matrix = np.concatenate([ np.reshape([2, 18, 34, 50, 66], [5, 1]), np.reshape([0, 16, 32, 48, 64], [5, 1]), np.reshape([self.init_val] * 5, [5, 1]), np.reshape([1, 17, 33, 49, 65], [5, 1]), np.reshape([self.init_val] * 5, [5, 1]) ], axis=1) remapped_matrix = variable_scope.get_variable( name='dnn_output/obtained_weight_matrix', shape=[5, 5], initializer=loading_initializer, partitioner=partitioned_variables.fixed_size_partitioner(2)) with self.cached_session(): variables.global_variables_initializer().run() self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor().eval())
def test_initializer_with_oov_only_partition(self): """Tests for the output layer initializer where one partition is all OOV.""" loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( new_row_vocab_size=5, new_col_vocab_file=self.new_class_vocab_file, old_col_vocab_file=self.old_class_vocab_file, new_col_vocab_size=4, old_tensor_name='some_scope/embeddings', ckpt_path=[self.checkpoint_file], new_row_vocab_file=self.new_feature_vocab_file, old_row_vocab_file=self.old_feature_vocab_file, num_row_oov_buckets=5, num_col_oov_buckets=1, initializer=self.initializer)) expected_remapped_matrix = np.concatenate( [ np.reshape([2, 18, 34, 50] + [self.init_val] * 6, [10, 1]), np.reshape([0, 16, 32, 48] + [self.init_val] * 6, [10, 1]), np.reshape([self.init_val] * 10, [10, 1]), np.reshape([1, 17, 33, 49] + [self.init_val] * 6, [10, 1]), np.reshape([self.init_val] * 10, [10, 1]), ], axis=1) # The new weight matrix is of size # [5 feature vocab + 5 feature OOV, 4 class vocab + 1 class OOV]. The # second partition has only OOV. remapped_matrix = variable_scope.get_variable( name='linear_all_oov/obtained_weight_matrix', shape=[10, 5], initializer=loading_initializer, partitioner=partitioned_variables.fixed_size_partitioner(2)) with self.test_session(): variables.global_variables_initializer().run() self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor().eval())
def test_load_and_remap_output_layer_weight_initializer_linear(self): """Tests for the output layer initializer in the linear multi-class case.""" loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( new_row_vocab_size=5, new_col_vocab_file=self.new_class_vocab_file, old_col_vocab_file=self.old_class_vocab_file, new_col_vocab_size=4, old_tensor_name='some_scope/embeddings', ckpt_path=[self.checkpoint_file], new_row_vocab_file=self.new_feature_vocab_file, old_row_vocab_file=self.old_feature_vocab_file, num_row_oov_buckets=1, num_col_oov_buckets=1, initializer=self.initializer)) expected_remapped_matrix = np.concatenate( [ np.reshape([2, 18, 34, 50, self.init_val, self.init_val], [6, 1]), np.reshape([0, 16, 32, 48, self.init_val, self.init_val], [6, 1]), np.reshape([self.init_val] * 6, [6, 1]), np.reshape([1, 17, 33, 49, self.init_val, self.init_val], [6, 1]), np.reshape([self.init_val] * 6, [6, 1]) ], axis=1) # The new weight matrix is of size # [5 feature vocab + 1 feature OOV, 4 class vocab + 1 class OOV]. Use a # partitioned variable to confirm that the offset logic works. remapped_matrix = variable_scope.get_variable( name='linear/obtained_weight_matrix', shape=[6, 5], initializer=loading_initializer, partitioner=partitioned_variables.fixed_size_partitioner(2)) with self.test_session(): variables.global_variables_initializer().run() self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor().eval())
def _warm_start_var_with_vocab(var, current_vocab_path, current_vocab_size, prev_ckpt, prev_vocab_path, previous_vocab_size=-1, current_oov_buckets=0, prev_tensor_name=None, initializer=None, axis=0): """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`. Use this method when the `var` is backed by vocabulary. This method stitches the given `var` such that values corresponding to individual features in the vocabulary remain consistent irrespective of changing order of the features between old and new vocabularies. Args: var: Current graph's variable that needs to be warm-started (initialized). Can be either of the following: (i) `Variable` (ii) `ResourceVariable` (iii) list of `Variable`: The list must contain slices of the same larger variable. (iv) `PartitionedVariable` current_vocab_path: Path to the vocab file used for the given `var`. current_vocab_size: An `int` specifying the number of entries in the current vocab. prev_ckpt: A string specifying the directory with checkpoint file(s) or path to checkpoint. The given checkpoint must have tensor with name `prev_tensor_name` (if not None) or tensor with name same as given `var`. prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`. previous_vocab_size: If provided, will constrain previous vocab to the first `previous_vocab_size` entries. -1 means use the entire previous vocab. current_oov_buckets: An `int` specifying the number of out-of-vocabulary buckets used for given `var`. prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If None, we lookup tensor with same name as given `var`. initializer: Variable initializer to be used for missing entries. If None, missing entries will be zero-initialized. axis: Axis of the variable that the provided vocabulary corresponds to. Raises: ValueError: If required args are not provided. """ if not (current_vocab_path and current_vocab_size and prev_ckpt and prev_vocab_path): raise ValueError( "Invalid args: Must provide all of [current_vocab_path, " "current_vocab_size, prev_ckpt, prev_vocab_path}.") if checkpoint_utils._is_variable(var): var = [var] elif (isinstance(var, list) and all(checkpoint_utils._is_variable(v) for v in var)): var = var elif isinstance(var, variables_lib.PartitionedVariable): var = var._get_variable_list() else: raise TypeError( "var MUST be one of the following: a Variable, list of Variable or " "PartitionedVariable, but is {}".format(type(var))) if not prev_tensor_name: # Assume tensor name remains the same. prev_tensor_name = _infer_var_name(var) # TODO(eddz): Fix functionality for rank-1 Variables (like FC biases). total_v_first_axis = sum(v.get_shape().as_list()[0] for v in var) for v in var: v_shape = v.get_shape().as_list() slice_info = v._get_save_slice_info() partition_info = None if slice_info: partition_info = variable_scope._PartitionInfo( full_shape=slice_info.full_shape, var_offset=slice_info.var_offset) if axis == 0: new_row_vocab_size = current_vocab_size new_col_vocab_size = v_shape[1] old_row_vocab_size = previous_vocab_size old_row_vocab_file = prev_vocab_path new_row_vocab_file = current_vocab_path old_col_vocab_file = None new_col_vocab_file = None num_row_oov_buckets = current_oov_buckets num_col_oov_buckets = 0 elif axis == 1: # Note that we must compute this value across all partitions, whereas # in the axis = 0 case, we can simply use v_shape[1] because we don't # allow partitioning across axis = 1. new_row_vocab_size = total_v_first_axis new_col_vocab_size = current_vocab_size old_row_vocab_size = -1 old_row_vocab_file = None new_row_vocab_file = None old_col_vocab_file = prev_vocab_path new_col_vocab_file = current_vocab_path num_row_oov_buckets = 0 num_col_oov_buckets = current_oov_buckets else: raise ValueError( "The only supported values for the axis argument are 0 " "and 1. Provided axis: {}".format(axis)) init = checkpoint_ops._load_and_remap_matrix_initializer( ckpt_path=checkpoint_utils._get_checkpoint_filename(prev_ckpt), old_tensor_name=prev_tensor_name, new_row_vocab_size=new_row_vocab_size, new_col_vocab_size=new_col_vocab_size, old_row_vocab_size=old_row_vocab_size, old_row_vocab_file=old_row_vocab_file, new_row_vocab_file=new_row_vocab_file, old_col_vocab_file=old_col_vocab_file, new_col_vocab_file=new_col_vocab_file, num_row_oov_buckets=num_row_oov_buckets, num_col_oov_buckets=num_col_oov_buckets, initializer=initializer) new_init_val = ops.convert_to_tensor( init(shape=v_shape, partition_info=partition_info)) v._initializer_op = state_ops.assign(v, new_init_val)
def _warm_start_var_with_vocab(var, current_vocab_path, current_vocab_size, prev_ckpt, prev_vocab_path, previous_vocab_size=-1, current_oov_buckets=0, prev_tensor_name=None, initializer=None, axis=0): """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`. Use this method when the `var` is backed by vocabulary. This method stitches the given `var` such that values corresponding to individual features in the vocabulary remain consistent irrespective of changing order of the features between old and new vocabularies. Args: var: Current graph's variable that needs to be warm-started (initialized). Can be either of the following: (i) `Variable` (ii) `ResourceVariable` (iii) list of `Variable`: The list must contain slices of the same larger variable. (iv) `PartitionedVariable` current_vocab_path: Path to the vocab file used for the given `var`. current_vocab_size: An `int` specifying the number of entries in the current vocab. prev_ckpt: A string specifying the directory with checkpoint file(s) or path to checkpoint. The given checkpoint must have tensor with name `prev_tensor_name` (if not None) or tensor with name same as given `var`. prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`. previous_vocab_size: If provided, will constrain previous vocab to the first `previous_vocab_size` entries. -1 means use the entire previous vocab. current_oov_buckets: An `int` specifying the number of out-of-vocabulary buckets used for given `var`. prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If None, we lookup tensor with same name as given `var`. initializer: Variable initializer to be used for missing entries. If None, missing entries will be zero-initialized. axis: Axis of the variable that the provided vocabulary corresponds to. Raises: ValueError: If required args are not provided. """ if not (current_vocab_path and current_vocab_size and prev_ckpt and prev_vocab_path): raise ValueError("Invalid args: Must provide all of [current_vocab_path, " "current_vocab_size, prev_ckpt, prev_vocab_path}.") if checkpoint_utils._is_variable(var): var = [var] elif (isinstance(var, list) and all(checkpoint_utils._is_variable(v) for v in var)): var = var elif isinstance(var, variables_lib.PartitionedVariable): var = var._get_variable_list() else: raise TypeError( "var MUST be one of the following: a Variable, list of Variable or " "PartitionedVariable, but is {}".format(type(var))) if not prev_tensor_name: # Assume tensor name remains the same. prev_tensor_name = _infer_var_name(var) # TODO(eddz): Fix functionality for rank-1 Variables (like FC biases). total_v_first_axis = sum([v.get_shape().as_list()[0] for v in var]) for v in var: v_shape = v.get_shape().as_list() slice_info = v._get_save_slice_info() partition_info = None if slice_info: partition_info = variable_scope._PartitionInfo( full_shape=slice_info.full_shape, var_offset=slice_info.var_offset) if axis == 0: new_row_vocab_size = current_vocab_size new_col_vocab_size = v_shape[1] old_row_vocab_size = previous_vocab_size old_row_vocab_file = prev_vocab_path new_row_vocab_file = current_vocab_path old_col_vocab_file = None new_col_vocab_file = None num_row_oov_buckets = current_oov_buckets num_col_oov_buckets = 0 elif axis == 1: # Note that we must compute this value across all partitions, whereas # in the axis = 0 case, we can simply use v_shape[1] because we don't # allow partitioning across axis = 1. new_row_vocab_size = total_v_first_axis new_col_vocab_size = current_vocab_size old_row_vocab_size = -1 old_row_vocab_file = None new_row_vocab_file = None old_col_vocab_file = prev_vocab_path new_col_vocab_file = current_vocab_path num_row_oov_buckets = 0 num_col_oov_buckets = current_oov_buckets else: raise ValueError("The only supported values for the axis argument are 0 " "and 1. Provided axis: {}".format(axis)) init = checkpoint_ops._load_and_remap_matrix_initializer( ckpt_path=checkpoint_utils._get_checkpoint_filename(prev_ckpt), old_tensor_name=prev_tensor_name, new_row_vocab_size=new_row_vocab_size, new_col_vocab_size=new_col_vocab_size, old_row_vocab_size=old_row_vocab_size, old_row_vocab_file=old_row_vocab_file, new_row_vocab_file=new_row_vocab_file, old_col_vocab_file=old_col_vocab_file, new_col_vocab_file=new_col_vocab_file, num_row_oov_buckets=num_row_oov_buckets, num_col_oov_buckets=num_col_oov_buckets, initializer=initializer) new_init_val = ops.convert_to_tensor( init(shape=v_shape, partition_info=partition_info)) v._initializer_op = state_ops.assign(v, new_init_val)
def _warmstart_var_with_vocab(var, current_vocab_path, current_vocab_size, prev_ckpt, prev_vocab_path, current_oov_buckets=0, prev_tensor_name=None): """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`. Use this method when the `var` is backed by vocabulary. This method stitches the given `var` such that values corresponding to individual features in the vocabulary remain consistent irrespective of changing order of the features between old and new vocabularies. Args: var: Current graph's variable that needs to be warm-started (initialized). Can be either of the following: (i) `Variable` (ii) `ResourceVariable` (iii) list of `Variable`: The list must contain slices of the same larger variable. (iv) `PartitionedVariable` current_vocab_path: Path to the vocab file used for the given `var`. current_vocab_size: An `int` specifying the number of entries in the current vocab. prev_ckpt: A string specifying the directory with checkpoint file(s) or path to checkpoint. The given checkpoint must have tensor with name `prev_tensor_name` (if not None) or tensor with name same as given `var`. prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`. current_oov_buckets: An `int` specifying the number of out-of-vocabulary buckets used for given `var`. prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If None, we lookup tensor with same name as given `var`. Raises: ValueError: If required args are not provided. """ if not (current_vocab_path and current_vocab_size and prev_ckpt and prev_vocab_path): raise ValueError("Invalid args: Must provide all of [current_vocab_path, " "current_vocab_size, prev_ckpt, prev_vocab_path}.") if _is_variable(var): var = [var] elif isinstance(var, list) and all(_is_variable(v) for v in var): var = var elif isinstance(var, variables.PartitionedVariable): var = var._get_variable_list() else: raise TypeError( "var MUST be one of the following: a Variable, list of Variable or " "PartitionedVariable, but is {}".format(type(var))) if not prev_tensor_name: # Assume tensor name remains the same. prev_tensor_name = _infer_var_name(var) for v in var: v_shape = v.get_shape().as_list() slice_info = v._get_save_slice_info() partition_info = None if slice_info: partition_info = variable_scope._PartitionInfo( full_shape=slice_info.full_shape, var_offset=slice_info.var_offset) # TODO(vihanjain): This is brittle. Can we instead infer actual initializer # used originally for the variable or use a fixed initializer? def _missing_ids_init(shape, dtype=None): # pylint: disable=cell-var-from-loop if dtype and dtype.base_dtype != v.dtype.base_dtype: raise ValueError("Trying to initialize missing ids with a different " "dtype `{}` than variable's dtype `{}`".format( dtype, v.dtype)) return array_ops.slice(v.initial_value, [0, 0], shape) # pylint: enable=cell-var-from-loop # TODO(vihanjain): Support _WarmstartSettings where class vocabularies need # remapping too. init = checkpoint_ops._load_and_remap_matrix_initializer( ckpt_path=saver.latest_checkpoint(prev_ckpt), old_tensor_name=prev_tensor_name, new_row_vocab_size=current_vocab_size, new_col_vocab_size=v_shape[1], old_row_vocab_file=prev_vocab_path, new_row_vocab_file=current_vocab_path, old_col_vocab_file=None, new_col_vocab_file=None, num_row_oov_buckets=current_oov_buckets, num_col_oov_buckets=0, initializer=_missing_ids_init) new_init_val = ops.convert_to_tensor( init(shape=v_shape, partition_info=partition_info)) v._initializer_op = state_ops.assign(v, new_init_val)