def testErrorConditions(self): self.assertRaises(ValueError, ws_util._WarmStartSettings, None) x = variable_scope.get_variable( "x", shape=[4, 1], initializer=ones(), partitioner=lambda shape, dtype: [2, 1]) # List of PartitionedVariable is invalid type when warmstarting with vocab. self.assertRaises(TypeError, ws_util._warmstart_var_with_vocab, [x], "/tmp", 5, "/tmp", "/tmp") # Keys of type other than FeatureColumn. self.assertRaises(TypeError, ws_util._warmstart, {"StringType": x}, ws_util._WarmStartSettings("/tmp")) # Unused variable names raises ValueError. with ops.Graph().as_default(): with self.test_session() as sess: x = variable_scope.get_variable( "x", shape=[4, 1], initializer=ones(), partitioner=lambda shape, dtype: [2, 1]) self._write_checkpoint(sess) self.assertRaises(ValueError, ws_util._warmstart, ws_util._WarmStartSettings( self.get_temp_dir(), var_name_to_vocab_info={ "y": ws_util._VocabInfo("", 1, 0, "") })) self.assertRaises(ValueError, ws_util._warmstart, ws_util._WarmStartSettings( self.get_temp_dir(), var_name_to_prev_var_name={"y": "y2"}))
def testWarmStartInputLayer_SparseColumnIntegerized(self): # Create feature column. sc_int = fc.categorical_column_with_identity("sc_int", num_buckets=10) # Save checkpoint from which to warm-start. _, prev_int_val = self._create_prev_run_var( "linear_model/sc_int/weights", shape=[10, 1], initializer=ones()) # Verify we initialized the values correctly. self.assertAllEqual(np.ones([10, 1]), prev_int_val) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_int], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {sc_int: [np.zeros([10, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_int], partitioner) ws_util._warmstart_input_layer(cols_to_vars, ws_util._WarmStartSettings( self.get_temp_dir())) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, {sc_int: [prev_int_val]}, sess)
def testWarmStart_SparseColumnHashed(self): # Create feature column. sc_hash = fc.categorical_column_with_hash_bucket( "sc_hash", hash_bucket_size=15) # Save checkpoint from which to warm-start. _, prev_hash_val = self._create_prev_run_var( "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_hash], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {sc_hash: [np.zeros([15, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_hash], partitioner) ws_util._warmstart(ws_util._WarmStartSettings( self.get_temp_dir(), vars_to_warmstart=".*sc_hash.*")) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, {sc_hash: [prev_hash_val]}, sess)
def testWarmStart_BucketizedColumn(self): # Create feature column. real = fc.numeric_column("real") real_bucket = fc.bucketized_column(real, boundaries=[0., 1., 2., 3.]) # Save checkpoint from which to warm-start. _, prev_bucket_val = self._create_prev_run_var( "linear_model/real_bucketized/weights", shape=[5, 1], initializer=norms()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([real_bucket], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {real_bucket: [np.zeros([5, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([real_bucket], partitioner) ws_util._warmstart(ws_util._WarmStartSettings( self.get_temp_dir(), vars_to_warmstart=".*real_bucketized.*")) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, {real_bucket: [prev_bucket_val]}, sess)
def testWarmStartInputLayer_SparseColumnIntegerized(self): # Create feature column. sc_int = fc.categorical_column_with_identity("sc_int", num_buckets=10) # Save checkpoint from which to warm-start. _, prev_int_val = self._create_prev_run_var( "linear_model/sc_int/weights", shape=[10, 1], initializer=ones()) # Verify we initialized the values correctly. self.assertAllEqual(np.ones([10, 1]), prev_int_val) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_int], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {sc_int: [np.zeros([10, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_int], partitioner) ws_util._warmstart_input_layer( cols_to_vars, ws_util._WarmStartSettings(self.get_temp_dir())) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, {sc_int: [prev_int_val]}, sess)
def testWarmStartMoreSettingsNoPartitioning(self): # Create old and new vocabs for sparse column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Create feature columns. sc_hash = fc.categorical_column_with_hash_bucket( "sc_hash", hash_bucket_size=15) sc_keys = fc.categorical_column_with_vocabulary_list( "sc_keys", vocabulary_list=["a", "b", "c", "e"]) sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) all_linear_cols = [sc_hash, sc_keys, sc_vocab] # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: variable_scope.get_variable( "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms()) sc_keys_weights = variable_scope.get_variable( "some_other_name", shape=[4, 1], initializer=rand()) variable_scope.get_variable( "linear_model/sc_vocab/weights", initializer=[[0.5], [1.], [2.], [3.]]) self._write_checkpoint(sess) prev_keys_val = sess.run(sc_keys_weights) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model(all_linear_cols, partitioner=None) vocab_info = ws_util._VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path ) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), vars_to_warmstart=".*(sc_keys|sc_vocab).*", var_name_to_vocab_info={ ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info }, var_name_to_prev_var_name={ ws_util._infer_var_name(cols_to_vars[sc_keys]): "some_other_name" }) ws_util._warmstart(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # sc_hash should not be warm-started. Var corresponding to sc_vocab # should be correctly warmstarted after vocab remapping. self._assert_cols_to_vars(cols_to_vars, { sc_keys: [prev_keys_val], sc_hash: [np.zeros([15, 1])], sc_vocab: [np.array([[3.], [2.], [1.], [0.5], [0.], [0.]])] }, sess)
def testWarmStartInputLayerEmbeddingColumn(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable( "input_layer/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42)) all_deep_cols = [emb_vocab] # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.input_layer( features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={ emb_vocab: prev_vocab_path }) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # emb_vocab should be correctly warmstarted after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] }, sess)
def testWarmStartInputLayerMoreSettings(self): # Create old and new vocabs for sparse column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Create feature columns. sc_hash = fc.categorical_column_with_hash_bucket( "sc_hash", hash_bucket_size=15) sc_keys = fc.categorical_column_with_vocabulary_list( "sc_keys", vocabulary_list=["a", "b", "c", "e"]) sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) all_linear_cols = [sc_hash, sc_keys, sc_vocab] # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable( "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms()) sc_keys_weights = variable_scope.get_variable( "some_other_name", shape=[4, 1], initializer=rand()) _ = variable_scope.get_variable( "linear_model/sc_vocab/weights", initializer=[[0.5], [1.], [2.], [3.]]) self._write_checkpoint(sess) prev_keys_val = sess.run(sc_keys_weights) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model(all_linear_cols, _partitioner) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={sc_vocab: prev_vocab_path}, col_to_prev_tensor={sc_keys: "some_other_name"}, exclude_columns=[sc_hash]) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # sc_hash should not be warm-started. Var corresponding to sc_vocab # should be correctly warmstarted after vocab remapping. self._assert_cols_to_vars(cols_to_vars, { sc_keys: np.split(prev_keys_val, 2), sc_hash: [np.zeros([8, 1]), np.zeros([7, 1])], sc_vocab: [ np.array([[3.], [2.], [1.]]), np.array([[0.5], [0.], [0.]]) ] }, sess)
def testWarmStartInputLayerEmbeddingColumn(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab( ["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable( "input_layer/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. initializer=init_ops.random_uniform_initializer(minval=0.42, maxval=0.42)) all_deep_cols = [emb_vocab] # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.input_layer(features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={emb_vocab: prev_vocab_path}) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # emb_vocab should be correctly warmstarted after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] }, sess)
def testWarmStart_SparseColumnVocabularyConstrainedVocabSizes(self): # Create old vocabulary, and use a size smaller than the total number of # entries. old_vocab_path = self._write_vocab(["apple", "guava", "banana"], "old_vocab") old_vocab_size = 2 # ['apple', 'guava'] # Create new vocab for sparse column "sc_vocab". current_vocab_path = self._write_vocab( ["apple", "banana", "guava", "orange"], "current_vocab") # Create feature column. Only use 2 of the actual entries, resulting in # ['apple', 'banana'] for the new vocabulary. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=current_vocab_path, vocabulary_size=2) # Save checkpoint from which to warm-start. self._create_prev_run_var("linear_model/sc_vocab/weights", shape=[2, 1], initializer=ones()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([2, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) vocab_info = ws_util._VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=old_vocab_path, old_vocab_size=old_vocab_size) warmstart_settings = ws_util._WarmStartSettings( ckpt_to_initialize_from=self.get_temp_dir(), vars_to_warmstart=".*sc_vocab.*", var_name_to_vocab_info={ "linear_model/sc_vocab/weights": vocab_info }) ws_util._warmstart(warmstart_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. 'banana' isn't in the # first two entries of the old vocabulary, so it's newly initialized. self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [[[1], [0]]]}, sess)
def testWarmStart_SparseColumnVocabularyConstrainedVocabSizes(self): # Create old vocabulary, and use a size smaller than the total number of # entries. old_vocab_path = self._write_vocab(["apple", "guava", "banana"], "old_vocab") old_vocab_size = 2 # ['apple', 'guava'] # Create new vocab for sparse column "sc_vocab". current_vocab_path = self._write_vocab( ["apple", "banana", "guava", "orange"], "current_vocab") # Create feature column. Only use 2 of the actual entries, resulting in # ['apple', 'banana'] for the new vocabulary. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=current_vocab_path, vocabulary_size=2) # Save checkpoint from which to warm-start. self._create_prev_run_var( "linear_model/sc_vocab/weights", shape=[2, 1], initializer=ones()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([2, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) vocab_info = ws_util._VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=old_vocab_path, old_vocab_size=old_vocab_size ) warmstart_settings = ws_util._WarmStartSettings( ckpt_to_initialize_from=self.get_temp_dir(), vars_to_warmstart=".*sc_vocab.*", var_name_to_vocab_info={ "linear_model/sc_vocab/weights": vocab_info }) ws_util._warmstart(warmstart_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. 'banana' isn't in the # first two entries of the old vocabulary, so it's newly initialized. self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [[[1], [0]]]}, sess)
def testErrorConditions(self): self.assertRaises(ValueError, ws_util._WarmStartSettings, None) x = variable_scope.get_variable( "x", shape=[4, 1], initializer=ones(), partitioner=lambda shape, dtype: [2, 1]) # List of PartitionedVariable is invalid type when warmstarting with vocab. self.assertRaises(TypeError, ws_util._warmstart_var_with_vocab, [x], "/tmp", 5, "/tmp", "/tmp") # Keys of type other than FeatureColumn. self.assertRaises(TypeError, ws_util._warmstart_input_layer, {"StringType": x}, ws_util._WarmStartSettings("/tmp"))
def testErrorConditions(self): self.assertRaises(ValueError, ws_util._WarmStartSettings, None) x = variable_scope.get_variable( "x", shape=[4, 1], initializer=ones(), partitioner=lambda shape, dtype: [2, 1]) # List of PartitionedVariable is invalid type when warmstarting with vocab. self.assertRaises(TypeError, ws_util._warmstart_var_with_vocab, [x], "/tmp", 5, "/tmp", "/tmp") # Keys of type other than FeatureColumn. self.assertRaises(TypeError, ws_util._warmstart, {"StringType": x}, ws_util._WarmStartSettings("/tmp"))
def testWarmStartInputLayer_SparseColumnVocabulary(self): # Create vocab for sparse column "sc_vocab". vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "vocab") # Create feature column. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=vocab_path, vocabulary_size=4) # Save checkpoint from which to warm-start. _, prev_vocab_val = self._create_prev_run_var( "linear_model/sc_vocab/weights", shape=[4, 1], initializer=ones()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([4, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) # Since old vocab is not explicitly set in WarmStartSettings, the old # vocab is assumed to be same as new vocab. ws_util._warmstart_input_layer( cols_to_vars, ws_util._WarmStartSettings(self.get_temp_dir())) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]}, sess)
def testWarmStart_ExplicitCheckpointFile(self): # Create vocab for sparse column "sc_vocab". vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "vocab") # Create feature column. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=vocab_path, vocabulary_size=4) # Save checkpoint from which to warm-start. _, prev_vocab_val = self._create_prev_run_var( "linear_model/sc_vocab/weights", shape=[4, 1], initializer=ones()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([4, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_vocab], partitioner) # Since old vocab is not explicitly set in WarmStartSettings, the old # vocab is assumed to be same as new vocab. ws_util._warmstart(ws_util._WarmStartSettings( # Explicitly provide the file prefix instead of just the dir. os.path.join(self.get_temp_dir(), "model-0"), vars_to_warmstart=".*sc_vocab.*")) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]}, sess)
def testWarmStartVarsToWarmstartIsNone(self): # Create old and new vocabs for sparse column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Create feature columns. sc_hash = fc.categorical_column_with_hash_bucket( "sc_hash", hash_bucket_size=15) sc_keys = fc.categorical_column_with_vocabulary_list( "sc_keys", vocabulary_list=["a", "b", "c", "e"]) sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) all_linear_cols = [sc_hash, sc_keys, sc_vocab] # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: variable_scope.get_variable( "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms()) variable_scope.get_variable( "some_other_name", shape=[4, 1], initializer=rand()) variable_scope.get_variable( "linear_model/sc_vocab/weights", initializer=[[0.5], [1.], [2.], [3.]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model(all_linear_cols, _partitioner) vocab_info = ws_util._VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path ) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), # The special value of None here will ensure that only the variable # specified in var_name_to_vocab_info (sc_vocab embedding) is # warmstarted. vars_to_warmstart=None, var_name_to_vocab_info={ ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info }, # Even though this is provided, the None value for vars_to_warmstart # overrides the logic, and this will not be warmstarted. var_name_to_prev_var_name={ ws_util._infer_var_name(cols_to_vars[sc_keys]): "some_other_name" }) ws_util._warmstart(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # sc_vocab should be correctly warmstarted after vocab remapping, # and neither of the other two should be warmstarted.. self._assert_cols_to_vars(cols_to_vars, { sc_keys: [np.zeros([2, 1]), np.zeros([2, 1])], sc_hash: [np.zeros([8, 1]), np.zeros([7, 1])], sc_vocab: [ np.array([[3.], [2.], [1.]]), np.array([[0.5], [0.], [0.]]) ] }, sess)
def testWarmStart_MultipleCols(self): # Create vocab for sparse column "sc_vocab". vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "vocab") # Create feature columns. sc_int = fc.categorical_column_with_identity("sc_int", num_buckets=10) sc_hash = fc.categorical_column_with_hash_bucket( "sc_hash", hash_bucket_size=15) sc_keys = fc.categorical_column_with_vocabulary_list( "sc_keys", vocabulary_list=["a", "b", "c", "e"]) sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=vocab_path, vocabulary_size=4) real = fc.numeric_column("real") real_bucket = fc.bucketized_column(real, boundaries=[0., 1., 2., 3.]) cross = fc.crossed_column([sc_keys, sc_vocab], hash_bucket_size=20) all_linear_cols = [sc_int, sc_hash, sc_keys, sc_vocab, real_bucket, cross] # Save checkpoint from which to warm-start. Also create a bias variable, # so we can check that it's also warmstarted. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: sc_int_weights = variable_scope.get_variable( "linear_model/sc_int/weights", shape=[10, 1], initializer=ones()) sc_hash_weights = variable_scope.get_variable( "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms()) sc_keys_weights = variable_scope.get_variable( "linear_model/sc_keys/weights", shape=[4, 1], initializer=rand()) sc_vocab_weights = variable_scope.get_variable( "linear_model/sc_vocab/weights", shape=[4, 1], initializer=ones()) real_bucket_weights = variable_scope.get_variable( "linear_model/real_bucketized/weights", shape=[5, 1], initializer=norms()) cross_weights = variable_scope.get_variable( "linear_model/sc_keys_X_sc_vocab/weights", shape=[20, 1], initializer=rand()) bias = variable_scope.get_variable( "linear_model/bias_weights", shape=[1], initializer=rand()) self._write_checkpoint(sess) (prev_int_val, prev_hash_val, prev_keys_val, prev_vocab_val, prev_bucket_val, prev_cross_val, prev_bias_val) = sess.run([ sc_int_weights, sc_hash_weights, sc_keys_weights, sc_vocab_weights, real_bucket_weights, cross_weights, bias ]) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model(all_linear_cols, partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, all weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, { sc_int: [np.zeros([10, 1])], sc_hash: [np.zeros([15, 1])], sc_keys: [np.zeros([4, 1])], sc_vocab: [np.zeros([4, 1])], real_bucket: [np.zeros([5, 1])], cross: [np.zeros([20, 1])], }, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model(all_linear_cols, partitioner) vocab_info = ws_util._VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=vocab_path ) ws_util._warmstart( ws_util._WarmStartSettings( self.get_temp_dir(), var_name_to_vocab_info={ "linear_model/sc_vocab/weights": vocab_info })) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, { sc_int: [prev_int_val], sc_hash: [prev_hash_val], sc_keys: [prev_keys_val], sc_vocab: [prev_vocab_val], real_bucket: [prev_bucket_val], cross: [prev_cross_val], "bias": [prev_bias_val], }, sess)
def testWarmStartInputLayerMoreSettings(self): # Create old and new vocabs for sparse column "sc_vocab". prev_vocab_path = self._write_vocab( ["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Create feature columns. sc_hash = fc.categorical_column_with_hash_bucket("sc_hash", hash_bucket_size=15) sc_keys = fc.categorical_column_with_vocabulary_list( "sc_keys", vocabulary_list=["a", "b", "c", "e"]) sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) all_linear_cols = [sc_hash, sc_keys, sc_vocab] # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable("linear_model/sc_hash/weights", shape=[15, 1], initializer=norms()) sc_keys_weights = variable_scope.get_variable( "some_other_name", shape=[4, 1], initializer=rand()) _ = variable_scope.get_variable( "linear_model/sc_vocab/weights", initializer=[[0.5], [1.], [2.], [3.]]) self._write_checkpoint(sess) prev_keys_val = sess.run(sc_keys_weights) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model( all_linear_cols, _partitioner) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={sc_vocab: prev_vocab_path}, col_to_prev_tensor={sc_keys: "some_other_name"}, exclude_columns=[sc_hash]) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # sc_hash should not be warm-started. Var corresponding to sc_vocab # should be correctly warmstarted after vocab remapping. self._assert_cols_to_vars( cols_to_vars, { sc_keys: np.split(prev_keys_val, 2), sc_hash: [np.zeros([8, 1]), np.zeros([7, 1])], sc_vocab: [ np.array([[3.], [2.], [1.]]), np.array([[0.5], [0.], [0.]]) ] }, sess)