def __init__(self, params): super().__init__(params) p = self.params assert p.vocab_size > 0 assert p.embedding_dim > 0 assert p.input_keys assert p.name assert p.num_tpu_hosts > 0 if p.combiner is None: assert p.max_sequence_length if p.max_sequence_length is not None and p.max_sequence_length > 0: assert p.combiner is None self._ids_per_shard = int( math.ceil(float(p.vocab_size) / p.num_tpu_hosts)) self._padded_vocab_size = self._ids_per_shard * p.num_tpu_hosts self._input_keys = p.input_keys self._max_sequence_length = 0 if p.max_sequence_length: self._max_sequence_length = p.max_sequence_length self.CreateChild('optimizer', p.optimizer) self._table_name = '{}_table'.format(p.name) self._table_config = tpu_embedding_lib.TableConfig( self._padded_vocab_size, p.embedding_dim, combiner=p.combiner, optimization_parameters=self.optimizer. tpu_embedding_optimizer_parameters) self._load_op_list = [] self._retrieve_op_list = []
def get_configs_from_feature_columns(feature_columns): """Create configs for TPUEmbedding etc from a list of feature columns. Args: feature_columns: a list of supported feature columns. Returns: A tuple of dicts, the first maps tables to their config, the second maps features to their config, the third maps learning rate key to callback that takes global step and outputs dynamic learning rate. """ allowed = ( tpu_fc_v2._TPUEmbeddingColumnV2, # pylint: disable=protected-access tpu_fc_v2._TPUSharedEmbeddingColumnV2) # pylint: disable=protected-access warn = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn) # pylint: disable=protected-access for column in feature_columns: if not isinstance(column, allowed + warn): raise TypeError( 'Unsupported feature column {}. Supported types are {}.'. format(type(column), allowed)) if isinstance(column, warn): logging.warn( 'Columns of type {} are deprecated. Supported types are {}.'. format(type(column), allowed)) table_to_config = {} feature_to_config = {} lr_key_to_fn, lr_fn_to_key = get_lr(feature_columns) lr_fn_to_key[None] = None for column in feature_columns: feature_name = column.get_feature_key_name() table_name = _get_table_name_from_embedding_var_name( column.get_embedding_var_name()) if feature_name in feature_to_config: raise ValueError( 'Feature column {} is used with multiple embeddings and this is ' 'not supported.'.format(feature_name)) feature_to_config[feature_name] = tpu_embedding.FeatureConfig( table_id=table_name, max_sequence_length=column.get_max_sequence_length(), weight_key=column.get_weight_key_name()) vocabulary_size, dimension = column.get_embedding_table_size() table_to_config[table_name] = tpu_embedding.TableConfig( vocabulary_size=vocabulary_size, dimension=dimension, initializer=column.get_initializer(), combiner=column.get_combiner(), learning_rate_key=lr_fn_to_key[column.get_learning_rate_fn()]) return table_to_config, feature_to_config, lr_key_to_fn
def __init__(self, params): super().__init__(params) p = self.params assert p.vocab_size > 0 assert p.embedding_dim > 0 assert p.input_keys assert p.name assert p.num_tpu_hosts > 0 if p.combiner is None: assert p.max_sequence_length if p.max_sequence_length is not None and p.max_sequence_length > 0: assert p.combiner is None self._ids_per_shard = int( math.ceil(float(p.vocab_size) / p.num_tpu_hosts)) self._padded_vocab_size = self._ids_per_shard * p.num_tpu_hosts self._input_keys = p.input_keys self._max_sequence_length = 0 if p.max_sequence_length: self._max_sequence_length = p.max_sequence_length self.CreateChild('optimizer', p.optimizer) def GetLearningRateFn(): if p.lr_schedule is None: return None else: self.CreateChild('schedule', p.lr_schedule) def LearningRateFn(step): lr = self.schedule.Value(step) _AddTpuEmbeddingSummaryTensor( 'tpu_embedding_lr/{}'.format(p.name), lr) return lr return LearningRateFn self._table_name = '{}_table'.format(p.name) self._table_config = tpu_embedding_lib.TableConfig( self._padded_vocab_size, p.embedding_dim, combiner=p.combiner, learning_rate=p.learning_rate, learning_rate_fn=GetLearningRateFn(), optimization_parameters=self.optimizer. tpu_embedding_optimizer_parameters) self._load_op_list = [] self._retrieve_op_list = []
def __init__(self, params): super().__init__(params) p = self.params assert p.vocab_size > 0 assert p.embedding_dim > 0 assert p.input_keys assert p.name assert p.num_tpu_hosts > 0 if p.combiner is None: assert p.max_sequence_length if p.max_sequence_length is not None and p.max_sequence_length > 0: assert p.combiner is None assert p.optimizer assert p.learning_rate assert p.lr_schedule self._ids_per_shard = int( math.ceil(float(p.vocab_size) / p.num_tpu_hosts)) self._padded_vocab_size = self._ids_per_shard * p.num_tpu_hosts self._input_keys = p.input_keys self._max_sequence_length = 0 if p.max_sequence_length: self._max_sequence_length = p.max_sequence_length self.CreateChild('optimizer', p.optimizer) self.CreateChild('schedule', p.lr_schedule) self._tpu_embedding_collection = TpuEmbeddingCollection.Get() def LearningRateFn(step): with py_utils.GlobalStepContext(step): lr = self.schedule.Value() * p.learning_rate self._tpu_embedding_collection.AddSummaryTensor( 'tpu_embedding_lr/{}'.format(p.name), lr) return lr self._table_name = '{}_table'.format(p.name) self._table_config = tpu_embedding_lib.TableConfig( self._padded_vocab_size, p.embedding_dim, combiner=p.combiner, learning_rate=None, learning_rate_fn=LearningRateFn, # All TableConfigs passed to API will have a learning rate function, # so the learning_rate in the optimization_parameters is not used. optimization_parameters=self.optimizer.CreateOptimizerParameters( p.learning_rate)) self._load_op_list = [] self._retrieve_op_list = []
def get_configs_from_feature_columns(feature_columns): """Create configs for TPUEmbedding etc from a list of feature columns. Args: feature_columns: a list of supported feature columns. Returns: A tuple of dicts, the first maps tables to their config, the second maps features to their config, and the third maps features to weight key names. """ allowed = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn) # pylint: disable=protected-access for column in feature_columns: if not isinstance(column, allowed): raise TypeError( 'Unsupported feature column {}. Supported types are {}.'. format(type(column), allowed)) table_to_config = {} feature_to_config = {} feature_to_weight_key_name = {} for column in feature_columns: feature_name = column.get_feature_key_name() table_name = _get_table_name_from_embedding_var_name( column.get_embedding_var_name()) if feature_name in feature_to_config: raise ValueError( 'Feature column {} is used with multiple embeddings and this is ' 'not supported.'.format(feature_name)) feature_to_config[feature_name] = tpu_embedding.FeatureConfig( table_id=table_name, max_sequence_length=column.get_max_sequence_length()) feature_to_weight_key_name[feature_name] = column.get_weight_key_name() vocabulary_size, dimension = column.get_embedding_table_size() table_to_config[table_name] = tpu_embedding.TableConfig( vocabulary_size=vocabulary_size, dimension=dimension, initializer=column.get_initializer(), combiner=column.get_combiner()) return table_to_config, feature_to_config, feature_to_weight_key_name
def get_tpu_embedding_config_from_feature_columns(feature_columns): """Create configs for TPUEmbedding from a list of feature columns. This function will place one embedding tensor per table and the return is intended to be used as input to TPUEmbedding. Args: feature_columns: a list of supported feature columns. Returns: A pair of dicts, the first maps tables to their config, the second maps features to tables. """ allowed = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn) # pylint: disable=protected-access for column in feature_columns: if not isinstance(column, allowed): raise TypeError( 'Unsupported feature column {}. Supported types are {}.'. format(type(column), allowed)) table_to_config = {} feature_to_table = {} for column in feature_columns: feature_name = column.get_feature_key_name() table_name = _get_table_name_from_embedding_var_name( column.get_embedding_var_name()) if feature_name in feature_to_table: raise ValueError( 'Feature column {} is used with multiple embeddings and this is ' 'not supported.'.format(feature_name)) feature_to_table[feature_name] = table_name vocabulary_size, dimension = column.get_embedding_table_size() table_to_config[table_name] = tpu_embedding.TableConfig( vocabulary_size=vocabulary_size, dimension=dimension, initializer=column.get_initializer(), combiner=column.get_combiner()) return table_to_config, feature_to_table
def get_feature_tbl_config(self): """Creates table configuration data structures. For all tables, vocab size and width are given by params. Table setup: tbl0 - categorical-feature-14 tbl1 - categorical-feature-15 .. Feature setup: categorical-feature-14 -- tbl0 (first sparse feature) categorical-feature-15 -- tbl1 (second sparse feature) Returns: A tuple of dicts, one for feature_to_config and one for table_to_config. """ def lr_fn(global_step): """Learning function for the embeddings. Linear warmup and poly decay.""" decay_exp = 2 scal = self._batch_size / 2048 adj_lr = self._learning_rate * scal if self._lr_warmup_steps == 0: return adj_lr if self._optimizer == "adagrad": return self._learning_rate warmup_lr = tf.cast( global_step, dtype=tf.float32) / self._lr_warmup_steps * adj_lr global_step = tf.cast(global_step, tf.float32) decay_steps = tf.cast(self._decay_steps, tf.float32) decay_start_step = tf.cast(self._decay_start_steps, tf.float32) steps_since_decay_start = global_step - decay_start_step already_decayed_steps = tf.minimum(steps_since_decay_start, decay_steps) decay_lr = adj_lr * ( (decay_steps - already_decayed_steps) / decay_steps)**decay_exp decay_lr = tf.maximum(0.0000001, decay_lr) lr = tf.where( global_step < self._lr_warmup_steps, warmup_lr, tf.where( tf.logical_and(decay_steps > 0, global_step > decay_start_step), decay_lr, adj_lr)) return lr table_to_config_dict = {} for i in range(self._num_tables_in_ec): vocab_size = self._table_size_sorted[i] table_to_config_dict["tbl%02d" % i] = tpu_embedding.TableConfig( vocabulary_size=vocab_size, dimension=self._de, # NOTE: Default weight initializer uses trunc_normal, # stddv=1/dimension. This is changed to match the mlperf # reference model. initializer=tf.random_uniform_initializer( minval=-1 / math.sqrt(vocab_size), maxval=1 / math.sqrt(vocab_size)), combiner=None, learning_rate_fn=lr_fn, # TODO(tayo): Using the utils lr_fn leads to problems with embedding # table size. The embedding table stops being able to fit. # learning_rate_fn=functools.partial(utils.lr_fn, params) ) # Use an offset to allow the categorical feature numbering to be subsequent # to the integer feature numbering. offset = 1 + self._num_dense_features feature_to_config_dict = {} feature_to_config_dict.update([ ("categorical-feature-%02d" % i, tpu_embedding.FeatureConfig(table_id="tbl%02d" % (i - offset))) for i in range(offset, offset + self._num_tables_in_ec) ]) return feature_to_config_dict, table_to_config_dict