def __init__(self, hparams, mode, problem_hparams=None, problem_idx=0, data_parallelism=None, ps_devices=None, decode_hparams=None): """Create a T2TModel. Args: hparams: a hyperparameters object. mode: The execution mode, as defined in tf.estimator.ModeKeys. problem_hparams: a hyperparameters object. problem_idx: an integer. data_parallelism: a expert_utils.parallelism (specifies devices for data parallelism). ps_devices: a list of devices to be used for experts decode_hparams: a hyperparameter object with decoding parameters. Returns: a T2TModel """ # Determine name first: use registered name if possible, class name else. default_name = registry.default_name(type(self)) name = self.REGISTERED_NAME or default_name super(T2TModel, self).__init__(trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if data_parallelism is None: data_parallelism = eu.Parallelism([""]) if ps_devices is None: ps_devices = [""] if problem_hparams is None: problem_hparams = hparams.problems[0] # If vocabularies differ, unset shared_embedding_and_softmax_weights. hparams = copy.copy(hparams) if hparams.shared_embedding_and_softmax_weights: same_vocab_sizes = True for problem in hparams.problems: if "inputs" in problem.input_modality: if problem.input_modality[ "inputs"] != problem.target_modality: same_vocab_sizes = False if not same_vocab_sizes: tf.logging.info( "Unsetting shared_embedding_and_softmax_weights.") hparams.shared_embedding_and_softmax_weights = 0 self._original_hparams = hparams self.set_mode(mode) self._decode_hparams = copy.copy(decode_hparams) self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n self._ps_devices = ps_devices self._problem_hparams = problem_hparams self._problem_idx = problem_idx self._create_modalities(problem_hparams, self._hparams) self._var_store = create_eager_var_store()
def __init__(self, hparams, mode=tf.estimator.ModeKeys.TRAIN, problem_hparams=None, data_parallelism=None, decode_hparams=None): """Create a T2TModel. Args: hparams: tf.contrib.training.HParams, model hyperparameters. mode: tf.estimator.ModeKeys, the execution mode. problem_hparams: tf.contrib.training.HParams, hyperparameters for the Problem. If provided here or in hparams.problems, the model will automatically determine bottom, top, and loss methods. If not provided, calling the model will only invoke body. data_parallelism: a expert_utils.Parallelism object, specifies devices for data parallelism. decode_hparams: a hyperparameter object with decoding parameters. See decoding.decode_hparams. Returns: a T2TModel """ # Determine name first: use registered name if possible, class name else. default_name = registry.default_name(type(self)) name = self.REGISTERED_NAME or default_name super(T2TModel, self).__init__(trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if not problem_hparams and hasattr(hparams, "problems"): problem_hparams = hparams.problems[0] self._problem_hparams = problem_hparams # Setup hparams # If vocabularies differ, unset shared_embedding_and_softmax_weights. hparams = copy.copy(hparams) if self._problem_hparams and hparams.shared_embedding_and_softmax_weights: same_vocab_sizes = True if "inputs" in self._problem_hparams.input_modality: if (self._problem_hparams.input_modality["inputs"] != self._problem_hparams.target_modality): same_vocab_sizes = False if not same_vocab_sizes: tf.logging.info( "Unsetting shared_embedding_and_softmax_weights.") hparams.shared_embedding_and_softmax_weights = 0 self._original_hparams = hparams self.set_mode(mode) self._decode_hparams = copy.copy(decode_hparams or decoding.decode_hparams()) self._data_parallelism = data_parallelism or eu.Parallelism([""]) self._num_datashards = self._data_parallelism.n self._ps_devices = self._data_parallelism.ps_devices self._eager_var_store = create_eager_var_store() if self._problem_hparams: self._create_modalities(self._problem_hparams, self._hparams)
def __init__(self, hparams, mode=tf.estimator.ModeKeys.TRAIN, problem_hparams=None, data_parallelism=None, decode_hparams=None): """Create a T2TModel. Args: hparams: tf.contrib.training.HParams, model hyperparameters. mode: tf.estimator.ModeKeys, the execution mode. problem_hparams: tf.contrib.training.HParams, hyperparameters for the Problem. If provided here or in hparams.problems, the model will automatically determine bottom, top, and loss methods. If not provided, calling the model will only invoke body. data_parallelism: a expert_utils.Parallelism object, specifies devices for data parallelism. decode_hparams: a hyperparameter object with decoding parameters. See decoding.decode_hparams. Returns: a T2TModel """ # Determine name first: use registered name if possible, class name else. default_name = registry.default_name(type(self)) name = self.REGISTERED_NAME or default_name super(T2TModel, self).__init__( trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if not problem_hparams and hasattr(hparams, "problems"): problem_hparams = hparams.problems[0] self._problem_hparams = problem_hparams # Setup hparams # If vocabularies differ, unset shared_embedding_and_softmax_weights. hparams = copy.copy(hparams) if self._problem_hparams and hparams.shared_embedding_and_softmax_weights: same_vocab_sizes = True if "inputs" in self._problem_hparams.input_modality: if (self._problem_hparams.input_modality["inputs"] != self._problem_hparams.target_modality): same_vocab_sizes = False if not same_vocab_sizes: log_info("Unsetting shared_embedding_and_softmax_weights.") hparams.shared_embedding_and_softmax_weights = 0 self._original_hparams = hparams self.set_mode(mode) self._decode_hparams = copy.copy(decode_hparams or decoding.decode_hparams()) self._data_parallelism = data_parallelism or eu.Parallelism([""]) self._num_datashards = self._data_parallelism.n self._ps_devices = self._data_parallelism.ps_devices self._eager_var_store = create_eager_var_store() if self._problem_hparams: self._create_modalities(self._problem_hparams, self._hparams)
def __init__(self, hparams, mode, problem_hparams=None, problem_idx=0, data_parallelism=None, ps_devices=None, decode_hparams=None): """Create a T2TModel. Args: hparams: a hyperparameters object. mode: The execution mode, as defined in tf.estimator.ModeKeys. problem_hparams: a hyperparameters object. problem_idx: an integer. data_parallelism: a expert_utils.parallelism (specifies devices for data parallelism). ps_devices: a list of devices to be used for experts decode_hparams: a hyperparameter object with decoding parameters. Returns: a T2TModel """ # Determine name first: use registered name if possible, class name else. default_name = registry.default_name(type(self)) name = self.REGISTERED_NAME or default_name super(T2TModel, self).__init__( trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if data_parallelism is None: data_parallelism = eu.Parallelism([""]) if ps_devices is None: ps_devices = [""] if problem_hparams is None: problem_hparams = hparams.problems[0] # If vocabularies differ, unset shared_embedding_and_softmax_weights. hparams = copy.copy(hparams) if hparams.shared_embedding_and_softmax_weights: same_vocab_sizes = True for problem in hparams.problems: if "inputs" in problem.input_modality: if problem.input_modality["inputs"] != problem.target_modality: same_vocab_sizes = False if not same_vocab_sizes: tf.logging.info("Unsetting shared_embedding_and_softmax_weights.") hparams.shared_embedding_and_softmax_weights = 0 self._original_hparams = hparams self.set_mode(mode) self._decode_hparams = copy.copy(decode_hparams) self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n self._ps_devices = ps_devices self._problem_hparams = problem_hparams self._problem_idx = problem_idx self._create_modalities(problem_hparams, self._hparams) self._var_store = create_eager_var_store()
def __init__(self, hparams, mode, problem_hparams=None, problem_idx=0, data_parallelism=None, decode_hparams=None): default_name = registry.default_name(type(self)) name = "transformer" base.Layer.__init__(self, trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if data_parallelism is None: data_parallelism = eu.Parallelism([""]) if problem_hparams is None: problem_hparams = hparams.problems[0] # If vocabularies differ, unset shared_embedding_and_softmax_weights. hparams = copy.copy(hparams) if hparams.shared_embedding_and_softmax_weights: same_vocab_sizes = True for problem in hparams.problems: if "inputs" in problem.input_modality: if problem.input_modality[ "inputs"] != problem.target_modality: same_vocab_sizes = False if not same_vocab_sizes: tf.logging.info( "Unsetting shared_embedding_and_softmax_weights.") hparams.shared_embedding_and_softmax_weights = 0 self._original_hparams = hparams self.set_mode(mode) self._decode_hparams = copy.copy(decode_hparams) self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n self._ps_devices = data_parallelism.ps_devices self._problem_hparams = problem_hparams self._problem_idx = problem_idx self._create_modalities(problem_hparams, self._hparams) self._var_store = t2t_model.create_eager_var_store() self.attention_weights = dict() # For vizualizing attention heads.
def name(self): t2t_registry.default_name(self.__class__)