def get_tpu_system_metadata(tpu_cluster_resolver): """Retrieves TPU system metadata given a TPUClusterResolver.""" master = tpu_cluster_resolver.master() # pylint: disable=protected-access cluster_def = (tpu_cluster_resolver.cluster_spec() or server_lib.ClusterSpec({})).as_cluster_def() tpu_system_metadata = (tpu_system_metadata_lib._query_tpu_system_metadata( master, cluster_def=cluster_def, query_topology=True)) return tpu_system_metadata
def get_tpu_system_metadata(tpu_cluster_resolver): """Retrieves TPU system metadata given a TPUClusterResolver.""" master = tpu_cluster_resolver.master() # pylint: disable=protected-access cluster_spec = tpu_cluster_resolver.cluster_spec() cluster_def = cluster_spec.as_cluster_def() if cluster_spec else None tpu_system_metadata = (tpu_system_metadata_lib._query_tpu_system_metadata( master, cluster_def=cluster_def, query_topology=False)) return tpu_system_metadata
def get_tpu_system_metadata(tpu_cluster_resolver): """Retrieves TPU system metadata given a TPUClusterResolver.""" master = tpu_cluster_resolver.master() # pylint: disable=protected-access cluster_spec = tpu_cluster_resolver.cluster_spec() cluster_def = cluster_spec.as_cluster_def() if cluster_spec else None tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata( master, cluster_def=cluster_def, query_topology=False)) return tpu_system_metadata
def get_tpu_system_metadata(tpu_cluster_resolver): """Retrieves TPU system metadata given a TPUClusterResolver.""" master = tpu_cluster_resolver.master() # pylint: disable=protected-access cluster_def = (tpu_cluster_resolver.cluster_spec() or server_lib.ClusterSpec({})).as_cluster_def() tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata( master, cluster_def=cluster_def, query_topology=True)) return tpu_system_metadata
def _get_tpu_system_metadata(self): """Gets the (maybe cached) TPU system metadata.""" master = self._get_master_address() tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master) if tpu_system_metadata is not None: return tpu_system_metadata # pylint: disable=protected-access tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata( master, query_topology=self.model_parallelism_enabled)) self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata return tpu_system_metadata
def _get_tpu_system_metadata(self): """Gets the (maybe cached) TPU system metadata.""" master = self._get_master_address() tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master) if tpu_system_metadata is not None: return tpu_system_metadata cluster_def = None if (self._config.session_config and self._config.session_config.cluster_def.job): cluster_def = self._config.session_config.cluster_def # pylint: disable=protected-access tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata( master, cluster_def=cluster_def, query_topology=self.model_parallelism_enabled)) self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata return tpu_system_metadata
def __init__(self, table_to_config_dict, feature_to_table_dict, batch_size, mode, master, optimization_parameters=None): """API for using TPU for embedding lookups. Args: table_to_config_dict: A dictionary mapping from string of table name to `TableConfig`. Table refers to an embedding table, e.g. `params` argument to `tf.nn.embedding_lookup_sparse()`. feature_to_table_dict: A dictionary mapping from string of feature name to string of table name. Feature refers to ids to lookup in embedding table, e.g. `sp_ids` argument to `tf.nn.embedding_lookup_sparse()`. batch_size: An `int` representing the global batch size. mode: `TRAINING` or `INFERENCE`. master: A `string` representing the TensorFlow master to use. optimization_parameters: `AdagradParameters`, `AdamParameters`, `Stochasticgradientdescentparameters`. Must be set in training and must be `None` in inference. Raises: ValueError: if any input is invalid. """ _validate_table_to_config_dict(table_to_config_dict) # Avoid nondeterminism from `Dict` iteration order by using `OrderedDict`. self._table_to_config_dict = _create_ordered_dict(table_to_config_dict) self._combiners = _create_combiners(self._table_to_config_dict) _validate_feature_to_table_dict(table_to_config_dict, feature_to_table_dict) self._feature_to_table_dict = _create_ordered_dict( feature_to_table_dict) self._table_to_features_dict = _create_table_to_features_dict( self._feature_to_table_dict) self._batch_size = batch_size self._master = master self._tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata(self._master)) # pylint: disable=protected-access if self._tpu_system_metadata.num_cores == 0: raise ValueError( 'TPUEmbedding needs TPUs, but master {} does not have ' 'TPUs.'.format(self._master)) self._num_hosts = self._tpu_system_metadata.num_hosts self._hosts = [ device.name for device in self._tpu_system_metadata.devices if 'device:CPU:' in device.name ] self._num_cores_per_host = self._tpu_system_metadata.num_of_cores_per_host self._num_cores = self._tpu_system_metadata.num_cores _validate_batch_size(self._batch_size, self._num_cores) self._batch_size_per_core = self._batch_size // self._num_cores self._init_ops = [] # TODO(shizhiw): remove `mode`? if mode == TRAINING: _validate_optimization_parameters(optimization_parameters) self._optimization_parameters = optimization_parameters elif mode == INFERENCE: if optimization_parameters is not None: raise ValueError('`optimization_parameters` should be `None` ' 'for inference mode.') self._optimization_parameters = ( StochasticGradientDescentParameters(1.)) else: raise ValueError('`mode` only supports {} and {}; got {}.'.format( TRAINING, INFERENCE, mode)) self._mode = mode # TODO(shizhiw): move `optimization_parameters` into `_optimizer_handler` # and create special handler for inference that inherits from # StochasticGradientDescentHandler with more user-friendly error message # on get_slot(). self._optimizer_handler = _get_optimization_handler( self._optimization_parameters) dummy_table_variables_init_op = self._create_dummy_table_variables() self._init_ops.append(dummy_table_variables_init_op) self._config_proto = self._create_config_proto()
def __init__(self, table_to_config_dict, feature_to_table_dict, batch_size, mode, master, optimization_parameters=None): """API for using TPU for embedding lookups. Args: table_to_config_dict: A dictionary mapping from string of table name to `TableConfig`. Table refers to an embedding table, e.g. `params` argument to `tf.nn.embedding_lookup_sparse()`. feature_to_table_dict: A dictionary mapping from string of feature name to string of table name. Feature refers to ids to lookup in embedding table, e.g. `sp_ids` argument to `tf.nn.embedding_lookup_sparse()`. batch_size: An `int` representing the global batch size. mode: `TRAINING` or `INFERENCE`. master: A `string` representing the TensorFlow master to use. optimization_parameters: `AdagradParameters`, `AdamParameters`, `Stochasticgradientdescentparameters`. Must be set in training and must be `None` in inference. Raises: ValueError: if any input is invalid. """ _validate_table_to_config_dict(table_to_config_dict) # Avoid nondeterminism from `Dict` iteration order by using `OrderedDict`. self._table_to_config_dict = _create_ordered_dict(table_to_config_dict) self._combiners = _create_combiners(self._table_to_config_dict) _validate_feature_to_table_dict(table_to_config_dict, feature_to_table_dict) self._feature_to_table_dict = _create_ordered_dict(feature_to_table_dict) self._table_to_features_dict = _create_table_to_features_dict( self._feature_to_table_dict) self._batch_size = batch_size self._master = master self._tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata(self._master)) # pylint: disable=protected-access if self._tpu_system_metadata.num_cores == 0: raise ValueError('TPUEmbedding needs TPUs, but master {} does not have ' 'TPUs.'.format(self._master)) self._num_hosts = self._tpu_system_metadata.num_hosts self._hosts = [device.name for device in self._tpu_system_metadata.devices if 'device:CPU:' in device.name] self._num_cores_per_host = self._tpu_system_metadata.num_of_cores_per_host self._num_cores = self._tpu_system_metadata.num_cores _validate_batch_size(self._batch_size, self._num_cores) self._batch_size_per_core = self._batch_size // self._num_cores self._init_ops = [] # TODO(shizhiw): remove `mode`? if mode == TRAINING: _validate_optimization_parameters(optimization_parameters) self._optimization_parameters = optimization_parameters elif mode == INFERENCE: if optimization_parameters is not None: raise ValueError('`optimization_parameters` should be `None` ' 'for inference mode.') self._optimization_parameters = ( StochasticGradientDescentParameters(1.)) else: raise ValueError('`mode` only supports {} and {}; got {}.' .format(TRAINING, INFERENCE, mode)) self._mode = mode # TODO(shizhiw): move `optimization_parameters` into `_optimizer_handler` # and create special handler for inference that inherits from # StochasticGradientDescentHandler with more user-friendly error message # on get_slot(). self._optimizer_handler = _get_optimization_handler( self._optimization_parameters) dummy_table_variables_init_op = self._create_dummy_table_variables() self._init_ops.append(dummy_table_variables_init_op) self._config_proto = self._create_config_proto()