예제 #1
0
 def __init__(self, params):
   super(BranchLayer, self).__init__(params)
   p = self.params
   assert p.name
   with tf.variable_scope(p.name):
     self.CreateChild('body', p.body)
예제 #2
0
파일: step.py 프로젝트: shengyushen/lingvo
 def __init__(self, params):
     super(StackStep, self).__init__(params)
     p = params
     with tf.variable_scope(p.name):
         self.sub_steps = []
         self.CreateChildren('sub', p.sub)
예제 #3
0
    def Apply(self, lr, var_grad):
        """For each optimizer, apply the gradient to the variable.

    Args:
      lr: A scalar. The base learning rate.
      var_grad: A `.NestedMap` of (var, grad) pairs.

    Returns:
      The variable update op.

    Raises:
      Exception: When the regex overlaps with or does not cover all variables.
    """
        # Override inherited GetOptimizer even though learning rate is unused.
        tf_optimizer_map = self.GetOptimizer(0)
        var_grad_map = {regex: [] for regex in self._optimizer_map}

        for (v, g) in var_grad.Flatten():
            regex_match = 0
            for regex in self._optimizer_map:
                if re.match(regex, v.name):
                    var_grad_map[regex].append((g, v))
                    regex_match += 1
            if regex_match == 0:
                var_grad_map['default_optimizer'].append((g, v))
            if regex_match > 1:
                raise Exception(
                    'Variable {} is matched {} times by regex {}'.format(
                        v.name, regex_match, list(self._optimizer_map.keys())))

        def _Apply():
            """Use the matched optimizer to apply the gradients."""
            train_ops = []
            non_default_regex = [
                regex for regex in self._optimizer_map
                if regex != 'default_optimizer'
            ]
            for regex in self._optimizer_map:
                if var_grad_map[regex]:
                    opt = tf_optimizer_map[regex]
                    train_ops.append(opt.apply_gradients(var_grad_map[regex]))
                    # pylint: disable=cell-var-from-loop, g-long-lambda
                    if regex == 'default_optimizer':
                        filtered_var_grad = var_grad.FilterKeyVal(
                            lambda k, v: any([
                                re.match(i, v.var.name)
                                for i in non_default_regex
                            ]))
                    else:
                        filtered_var_grad = var_grad.FilterKeyVal(
                            lambda k, v: (re.match(regex, v.var.name)))
                    # pylint: enable=cell-var-from-loop, g-long-lambda
                    self._optimizer_map[regex].AddSummary(
                        self._lr_map[regex], opt, filtered_var_grad)
            return tf.group(*train_ops, name='composite_optimizer_train_op')

        if not py_utils.use_resource_variables():
            var_update_op = _Apply()
        else:
            # Many optimizers, e.g., Adam, Adagrad, etc., create
            # variables. We need to ensure name scope and variable scope are
            # cleared. Otherwise, tpu.batch_parallel does not work.
            var_reuse = False
            if py_utils.GetOpportunisticVariableReuse():
                var_reuse = tf.AUTO_REUSE
            with tf.name_scope(None):
                with tf.variable_scope(
                        tf.VariableScope(use_resource=True, reuse=var_reuse)):
                    var_update_op = _Apply()
        return var_update_op
예제 #4
0
  def __init__(self, params):
    super(MTEncoderV1, self).__init__(params)
    p = self.params
    assert not p.packed_input, ('Packed inputs are not yet supported for '
                                'MTEncoderV1.')

    with tf.variable_scope(p.name):
      if p.cc_schedule is not None:
        self.CreateChild('cc_schedule', p.cc_schedule)

      self.CreateChild('emb', p.emb)

      rnn_layers_params = []

      # L0 is a bi-directional lstm.

      # L0's forward lstm cell
      if p.lstm_tpl_bidi is None:
        params = p.lstm_tpl.Copy()
      else:
        params = p.lstm_tpl_bidi.Copy()
      params.name = 'L0_rnn_fwd'
      params.num_input_nodes = p.emb.embedding_dim
      params.num_output_nodes = p.lstm_cell_size
      forward_lstm = params

      # L0's backward lstm cell
      params = params.Copy()
      params.name = 'L0_rnn_bak'
      backward_lstm = params

      # L0 layer.
      params = model_helper.CreateBidirectionalRNNParams(
          self.params, forward_lstm, backward_lstm)
      params.name = 'L0'
      rnn_layers_params.append(params)

      # The latter layers are all uni-directional lstm.
      input_size = 2 * p.lstm_cell_size
      for i in range(1, p.num_lstm_layers):
        # Forward lstm cell.
        if p.lstm_tpl_uni is None:
          cell = p.lstm_tpl.Copy()
        else:
          cell = p.lstm_tpl_uni.Copy()
        cell.name = 'L%d_rnn' % i
        cell.num_input_nodes = input_size
        cell.num_output_nodes = p.lstm_cell_size
        # Forward lstm layer.
        params = model_helper.CreateUnidirectionalRNNParams(self.params, cell)
        params.name = 'L%d' % i
        rnn_layers_params.append(params)
        input_size = p.lstm_cell_size

      self.CreateChildren('rnn', rnn_layers_params)

      dropout_p = layers.DropoutLayer.Params().Set(
          name='dropout_layer',
          keep_prob=1.0 - p.dropout_prob,
          random_seed=p.random_seed + 84828474 if p.random_seed else None)
      self.CreateChild('dropout', dropout_p)
예제 #5
0
파일: step.py 프로젝트: shengyushen/lingvo
 def __init__(self, params):
     super(StatelessLayerStep, self).__init__(params)
     p = params
     with tf.variable_scope(p.name):
         self.CreateChild('layer', p.layer)
예제 #6
0
파일: encoder.py 프로젝트: k1eira/lingvo
    def __init__(self, params):
        super(AsrEncoder, self).__init__(params)
        p = self.params
        name = p.name

        with tf.variable_scope(name):
            # Use specAugment or not.
            if p.use_specaugment:
                self.CreateChild('specaugment', p.specaugment_network.Copy())
            # First create the conv layers.

            assert p.num_cnn_layers == len(p.conv_filter_shapes)
            assert p.num_cnn_layers == len(p.conv_filter_strides)
            params_conv_layers = []
            for i in range(p.num_cnn_layers):
                conv_p = p.cnn_tpl.Copy()
                conv_p.name = 'conv_L%d' % (i)
                conv_p.filter_shape = p.conv_filter_shapes[i]
                conv_p.filter_stride = p.conv_filter_strides[i]
                conv_p.is_eval = p.is_eval
                params_conv_layers.append(conv_p)
            self.CreateChildren('conv', params_conv_layers)

            conv_output_shape = p.input_shape
            for i in range(p.num_cnn_layers):
                conv_output_shape = self.conv[i].OutShape(conv_output_shape)
            assert len(
                conv_output_shape) == 4  # batch, height, width, channel.

            params_conv_lstm_rnn = []
            params_conv_lstm_cnn = []
            for i in range(p.num_conv_lstm_layers):
                # NOTE(yonghui): We assume that output from ConvLSTMBlock has the same
                # shape as its input.
                _, _, width, in_channel = conv_output_shape
                f_conv_lstm_p = p.conv_lstm_tpl.Copy()
                f_conv_lstm_p.name = 'f_conv_lstm_%d' % (i)
                f_conv_lstm_p.inputs_shape = [None, 1, width, in_channel]
                f_conv_lstm_p.cell_shape = [None, 1, width, in_channel]
                b_conv_lstm_p = f_conv_lstm_p.Copy()
                b_conv_lstm_p.name = 'b_conv_lstm_%d' % (i)
                conv_lstm_rnn_p = self.CreateConvLstmLayerParams()
                conv_lstm_rnn_p.name = 'conv_lstm_rnn'
                conv_lstm_rnn_p.fwd = f_conv_lstm_p
                conv_lstm_rnn_p.bak = b_conv_lstm_p
                params_conv_lstm_rnn.append(conv_lstm_rnn_p)
                cnn_p = p.after_conv_lstm_cnn_tpl.Copy()
                cnn_p.name = 'conv_lstm_cnn_%d' % (i)
                cnn_p.filter_shape[2] = 2 * in_channel
                cnn_p.filter_shape[3] = in_channel
                params_conv_lstm_cnn.append(cnn_p)
                # TODO(yonghui): Refactor ConvLSTMBlock into a layer.
            self.CreateChildren('conv_lstm_rnn', params_conv_lstm_rnn)
            self.CreateChildren('conv_lstm_cnn', params_conv_lstm_cnn)

            (self._first_lstm_input_dim, self._first_lstm_input_dim_pad
             ) = self.FirstLstmLayerInputDimAndPadding(conv_output_shape,
                                                       pad_to_multiple=16)

            # Now create all the rnn layers and projection layers.
            # TODO(yonghui): take care of device placement.
            params_rnn_layers = []
            params_proj_layers = []
            params_highway_skip_layers = []
            output_dim = self._first_lstm_input_dim
            for i in range(p.num_lstm_layers):
                input_dim = output_dim
                forward_p = p.lstm_tpl.Copy()
                forward_p.name = 'fwd_rnn_L%d' % (i)
                forward_p.num_input_nodes = input_dim
                forward_p.num_output_nodes = p.lstm_cell_size
                backward_p = forward_p.Copy()
                backward_p.name = 'bak_rnn_L%d' % (i)
                rnn_p = self.CreateBidirectionalRNNParams(
                    forward_p, backward_p)
                rnn_p.name = 'brnn_L%d' % (i)
                params_rnn_layers.append(rnn_p)
                output_dim = 2 * p.lstm_cell_size

                if p.project_lstm_output and (i < p.num_lstm_layers - 1):
                    proj_p = p.proj_tpl.Copy()
                    proj_p.input_dim = 2 * p.lstm_cell_size
                    proj_p.output_dim = 2 * p.lstm_cell_size
                    proj_p.name = 'proj_L%d' % (i)
                    proj_p.is_eval = p.is_eval
                    params_proj_layers.append(proj_p)

                # add the skip layers
                residual_index = i - p.residual_start + 1
                if p.residual_start > 0 and residual_index >= 0 and p.highway_skip:
                    highway_skip = p.highway_skip_tpl.Copy()
                    highway_skip.name = 'enc_hwskip_%d' % len(
                        params_highway_skip_layers)
                    highway_skip.input_dim = 2 * p.lstm_cell_size
                    params_highway_skip_layers.append(highway_skip)
                # Adds the stacking layer.
                if p.layer_index_before_stacking == i:
                    stacking_layer = p.stacking_layer_tpl.Copy()
                    stacking_layer.name = 'stacking_%d' % (i)
                    self.CreateChild('stacking', stacking_layer)
                    stacking_window_len = (p.stacking_layer_tpl.left_context +
                                           1 +
                                           p.stacking_layer_tpl.right_context)
                    output_dim *= stacking_window_len

            self.CreateChildren('rnn', params_rnn_layers)
            self.CreateChildren('proj', params_proj_layers)
            self.CreateChildren('highway_skip', params_highway_skip_layers)
예제 #7
0
  def __init__(self, params):
    assert issubclass(params.cls, BaseTask)
    # Ensure global_step exists before calling super.
    py_utils.GetOrCreateGlobalStepVar()
    super().__init__(params)

    p = self.params

    self._encoder = None
    self._online_encoder = None
    self._decoder = None

    self._loss = None
    self._num_predictions = None
    self._train_op = None
    self._post_train_ops = []
    self._eval_metrics = {}
    self._per_example = {}

    # Create the gradient mask,
    self._per_input_gradient_mask = None

    if p.task_global_step:
      with tf.name_scope(None), tf.variable_scope(
          py_utils.GetGlobalVariableScope()):
        var_name = p.name + '_global_step'
        # Create the variable immediately.
        self._CreateVariableInternal(
            var_name,
            base_layer.CreateVariableMeta(
                var_params=py_utils.WeightParams(
                    [], py_utils.WeightInit.Constant(0), tf.int64),
                theta_fn=None,
                kwargs=dict(
                    trainable=False,
                    collections=[tf.GraphKeys.GLOBAL_VARIABLES])))
        summary_utils.scalar(var_name, self._private_vars[var_name])
        self._global_step_var = self._private_vars[var_name]
    else:
      self._global_step_var = py_utils.GetOrCreateGlobalStepVar()

    if p.input:
      # TODO(zhifengc): Consider a simpler way to ensure the input
      # generator stops after one epoch.
      if self.do_eval and p.eval:
        seq_inp = issubclass(p.input.cls,
                             base_input_generator.BaseInputGeneratorFromFiles)
        if p.input.num_samples > 0:
          if (p.eval.samples_per_summary == 0) or (p.input.num_samples <
                                                   p.eval.samples_per_summary):
            p.eval.samples_per_summary = p.input.num_samples
            # If we know the dataset size and we want to evaluate the full
            # set, we need to coordinate the input generator to flush out
            # all samples so the evaler and decoder compute metrics on the
            # whole set for each summary step.
            if seq_inp:
              p.input.flush_every_n = p.input.num_samples
          if p.eval.decoder_samples_per_summary is not None and (
              p.eval.decoder_samples_per_summary > p.input.num_samples):
            p.eval.decoder_samples_per_summary = p.input.num_samples
        if p.input.eval_samples_per_summary is not None:
          p.eval.samples_per_summary = p.input.eval_samples_per_summary
        if p.input.decoder_samples_per_summary is not None:
          p.eval.decoder_samples_per_summary = (
              p.input.decoder_samples_per_summary)
        if p.input.num_samples == 0 and not p.input.resettable:
          # Dataset size is unknown. Computes eval summary based on num_samples.
          # We require static dataset size for non-resettable inputs.
          assert p.eval.samples_per_summary > 0
        if seq_inp and p.input.num_batcher_threads > 1:
          tf.logging.warning(
              'input.num_batcher_threads > 1 inside eval mode.  '
              'The input generator may not iterate over exactly '
              'one epoch per run')
      tf.logging.info('input_params: %s', p.input)
      input_params = self.cluster.PlaceInput(p.input)

      # For TPU training, we create the input generator in a
      # different scope and AddChild it in later.
      if 'skip_create_child' not in p.input:
        self.CreateChild('input', input_params)

    tp = p.train

    # p.train can be None if this task is the teacher/student task in a
    # DistillationTask.
    if tp:
      self._SetLearnerFromLegacyParams(tp)
      if tp.learner is not None:
        if isinstance(tp.learner, (list, tuple)):
          self.CreateChildren('learners', tp.learner)
        else:
          self.CreateChildren('learners', [tp.learner])
    self._UpdateVnConfig()
예제 #8
0
 def __init__(self, params):
     super(PointDetectorBase, self).__init__(params)
     p = self.params
     self._utils_3d = detection_3d_lib.Utils3D()
     with tf.variable_scope(p.name):
         self.CreateChild('output_decoder', p.output_decoder)
예제 #9
0
 def __init__(self, params):
   super(RnnStep, self).__init__(params)
   p = params
   with tf.variable_scope(p.name):
     self.CreateChild('cell', p.cell)
예제 #10
0
 def _CreateChildrenVariables(self):
   if self.params.shared_emb:
     with tf.variable_scope('shared_emb', reuse=tf.AUTO_REUSE):
       self.softmax.InstantiateVariables()
   super()._CreateChildrenVariables()
예제 #11
0
 def __init__(self, params):
   super(TestTask, self).__init__(params)
   p = self.params
   with tf.variable_scope(p.name):
     self.CreateChild('encoder', p.encoder)
예제 #12
0
    def _resource_apply_dense(self, grad, var):
        if grad is None:
            tf.logging.warning('Gradient is None for variable %s' % var.name)
            return []

        grad_dtype = var.dtype  # TODO(lepikhin): add to params
        grad = tf.cast(grad, grad_dtype)
        factored_dims = self._factored_dims(var.shape.as_list())
        if factored_dims:
            vr = self.get_slot(var, 'vr')
            vc = self.get_slot(var, 'vc')
        else:
            v = self.get_slot(var, 'v')
        if self._beta1:
            m = self.get_slot(var, 'm')

        cond = tf.constant(True)

        def _Upd(c, x):
            if not self._cond_is_finite:
                return c
            c = tf.math.logical_and(c, tf.reduce_all(tf.math.is_finite(x)))
            c = tf.math.logical_and(
                c, tf.reduce_all(tf.math.logical_not(tf.math.is_inf(x))))
            return c

        def _Wrap(fn, x, y):
            if not self._cond_is_finite:
                return fn(x, y)
            return tf.cond(cond, lambda: fn(x, y), lambda: x)

        with tf.variable_scope(var.name[:-2] + '/Adafactor'):
            grad_squared = tf.math.square(grad) + tf.cast(
                self._epsilon1, grad_dtype)
            cond = _Upd(cond, grad_squared)
            decay_rate = tf.cast(self._decay_rate, var.dtype)
            old_val = tf.identity(
                var)  # TODO(lepikhin): introduce gradient dtype
            if self._multiply_by_parameter_scale:
                update_scale = self._parameter_scale(old_val) * tf.cast(
                    self._learning_rate, grad_dtype)
            else:
                update_scale = self._learning_rate
            mixing_rate = tf.cast(1.0 - decay_rate, grad_dtype)
            update_scale = tf.cast(update_scale, grad_dtype)
            updates = []
            if factored_dims:
                d0, d1 = factored_dims
                vr_axis, vc_axis = d0, d1
                grad_squared_row_mean = tf.reduce_mean(grad_squared,
                                                       axis=vr_axis)
                grad_squared_col_mean = tf.reduce_mean(grad_squared,
                                                       axis=vc_axis)
                # new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
                new_vr = vr * decay_rate + grad_squared_row_mean * mixing_rate
                # new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
                new_vc = vc * decay_rate + grad_squared_col_mean * mixing_rate
                cond = _Upd(cond, new_vr)
                cond = _Upd(cond, new_vc)
                vr_update = _Wrap(tf.assign, vr, new_vr)
                vc_update = _Wrap(tf.assign, vc, new_vc)
                updates.extend([vr_update, vc_update])
                long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
                r_factor = tf.math.rsqrt(new_vr / long_term_mean)
                c_factor = tf.math.rsqrt(new_vc)
                x = grad * tf.expand_dims(r_factor, vr_axis) * tf.expand_dims(
                    c_factor, vc_axis)
            else:
                new_v = v * decay_rate + grad_squared * mixing_rate
                cond = _Upd(cond, new_v)
                v_update = _Wrap(tf.assign, v, new_v)
                updates.append(v_update)
                x = grad * tf.math.rsqrt(new_v)
            if self._clipping_threshold is not None:
                clipping_denom = tf.maximum(
                    tf.constant(1.0, grad_dtype),
                    _ReduceRms(x) /
                    tf.constant(self._clipping_threshold, grad_dtype))
                x /= clipping_denom
            subtrahend = x * update_scale
            if self._beta1:
                new_m = (m * tf.constant(self._beta1, dtype=grad_dtype) +
                         subtrahend *
                         tf.constant(1.0 - self._beta1, dtype=grad_dtype))
                subtrahend = new_m
                cond = _Upd(cond, new_m)
                updates.append(_Wrap(tf.assign, m, new_m))
            # It is critical to use assign_sub instead of tf.assign(var - subtrahend)
            #  for the case of bfloat16 activations, so as to avoid repeatedly
            #  rounding the slice value, which results in poor quality.
            cond = _Upd(cond, subtrahend)
            var_update = _Wrap(tf.assign_sub, var, subtrahend)
            updates.append(var_update)
            return tf.group(*updates)
예제 #13
0
    def try_apply_dense(self, grad, var):
        assert grad is not None

        cond = tf.constant(True)
        is_finite_checks = []
        stats = {}

        grad_dtype = var.dtype  # TODO(lepikhin): add to params
        grad = tf.cast(grad, grad_dtype)
        factored_dims = self._factored_dims(var.shape.as_list())
        if factored_dims:
            vr = self.get_slot(var, 'vr')
            vc = self.get_slot(var, 'vc')
        else:
            v = self.get_slot(var, 'v')
        if self._beta1:
            m = self.get_slot(var, 'm')

        def _Upd(c, k, x):
            stats[k] = x
            is_finite_checks.append(tf.reduce_all(tf.math.is_finite(x)))
            return c

        with tf.variable_scope(var.name[:-2] + '/Adafactor'):
            grad_squared = tf.math.square(grad) + tf.cast(
                self._epsilon1, grad_dtype)
            cond = _Upd(cond, 'grad_squared', grad_squared)  # 0 (factored)
            decay_rate = tf.cast(self._decay_rate, var.dtype)
            old_val = tf.identity(
                var)  # TODO(lepikhin): introduce gradient dtype
            assert self._multiply_by_parameter_scale
            if self._multiply_by_parameter_scale:
                parameter_scale = self._parameter_scale(old_val)
                cond = _Upd(cond, 'parameter_scale',
                            parameter_scale)  # 1 (factored)
                update_scale = self._parameter_scale(old_val) * tf.cast(
                    self._learning_rate, grad_dtype)

            else:
                update_scale = self._learning_rate
            mixing_rate = tf.cast(1.0 - decay_rate, grad_dtype)
            update_scale = tf.cast(update_scale, grad_dtype)
            if factored_dims:
                d0, d1 = factored_dims
                vr_axis, vc_axis = d0, d1
                grad_squared_row_mean = tf.reduce_mean(grad_squared,
                                                       axis=vr_axis)
                grad_squared_col_mean = tf.reduce_mean(grad_squared,
                                                       axis=vc_axis)
                # new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
                new_vr = vr * decay_rate + grad_squared_row_mean * mixing_rate
                # new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
                new_vc = vc * decay_rate + grad_squared_col_mean * mixing_rate
                cond = _Upd(cond, 'new_vr', new_vr)  # 2 (factored)
                cond = _Upd(cond, 'new_vc', new_vc)  # 3 (factored)
                # vr_update = _Wrap(tf.assign, vr, new_vr)
                # vc_update = _Wrap(tf.assign, vc, new_vc)
                # updates.extend([vr_update, vc_update])
                long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
                r_factor = tf.math.rsqrt(new_vr / long_term_mean)
                c_factor = tf.math.rsqrt(new_vc)
                mult = tf.expand_dims(r_factor, vr_axis) * tf.expand_dims(
                    c_factor, vc_axis)
                cond = _Upd(cond, 'mult', mult)  # 4 (factored)
                x = grad * mult
            else:
                new_v = v * decay_rate + grad_squared * mixing_rate
                cond = _Upd(cond, 'new_v', new_v)
                # v_update = _Wrap(tf.assign, v, new_v)
                # updates.append(v_update)
                x = grad * tf.math.rsqrt(new_v)

            assert self._clipping_threshold is not None

            if self._clipping_threshold is not None:
                clipping_denom = tf.maximum(
                    tf.constant(1.0, grad_dtype),
                    _ReduceRms(x) /
                    tf.constant(self._clipping_threshold, grad_dtype))
                x /= clipping_denom
            cond = _Upd(cond, 'x', x)
            subtrahend = x * update_scale
            if self._beta1:
                new_m = (m * tf.constant(self._beta1, dtype=grad_dtype) +
                         subtrahend *
                         tf.constant(1.0 - self._beta1, dtype=grad_dtype))
                subtrahend = new_m
                cond = _Upd(cond, 'new_m', new_m)
                # updates.append(_Wrap(tf.assign, m, new_m))

            # It is critical to use assign_sub instead of tf.assign(var - subtrahend)
            #  for the case of bfloat16 activations, so as to avoid repeatedly
            #  rounding the slice value, which results in poor quality.
            cond = _Upd(cond, 'subtrahend', subtrahend)  # 5 (factored)

            # var_update = _Wrap(tf.assign_sub, var, subtrahend)
            # updates.append(var_update)

            return is_finite_checks, stats
예제 #14
0
파일: pillars.py 프로젝트: xueyongfu/lingvo
 def __init__(self, params):
     super(PointsToGridFeaturizer, self).__init__(params)
     p = self.params
     with tf.variable_scope(p.name):
         self.CreateChild('featurizer', p.featurizer)
예제 #15
0
 def __init__(self, params):
   super(BatchParallelLayer, self).__init__(params)
   p = self.params
   assert p.name
   with tf.variable_scope(p.name):
     self.CreateChild('sub', p.sub)
예제 #16
0
 def __init__(self, params):
     super(ParallelStep, self).__init__(params)
     p = params
     with tf.variable_scope(p.name):
         self.CreateChildren('sub', p.sub)
예제 #17
0
 def _CreateChildrenVariables(self):
     with tf.variable_scope(self.params.name):
         with py_utils.VariableShapePrefixContext(self.params.repeat):
             self.body.InstantiateVariables()
     super()._CreateChildrenVariables()
예제 #18
0
  def __init__(self, params):
    super(MTEncoderBiRNN, self).__init__(params)
    p = self.params

    with tf.variable_scope(p.name):
      if p.cc_schedule is None:
        self.cc_schedule = None
      else:
        self.CreateChild('cc_schedule', p.cc_schedule)

      self.CreateChild('emb', p.emb)

      rnn_layers_params = []

      for i in range(p.num_lstm_layers):
        params = p.lstm_tpl.Copy()
        params.name = 'L%d_rnn_fwd' % i
        if i == 0:
          params.num_input_nodes = p.emb.embedding_dim
        else:
          params.num_input_nodes = 2 * p.lstm_cell_size
        params.num_output_nodes = p.lstm_cell_size
        params.reset_cell_state = p.packed_input
        forward_lstm = params

        params = params.Copy()
        params.name = 'L%d_rnn_bak' % i
        params.reset_cell_state = p.packed_input
        backward_lstm = params

        params = model_helper.CreateBidirectionalRNNParams(
            self.params, forward_lstm, backward_lstm)
        params.packed_input = p.packed_input
        params.name = 'L%d' % i
        rnn_layers_params.append(params)

      self.CreateChildren('rnn', rnn_layers_params)

      if p.lstm_cell_size * 2 != p.encoder_out_dim:
        # Project the encoder output to the desired dim.
        proj_p = p.proj_tpl.Copy().Set(
            name='proj',
            batch_norm=False,
            input_dim=p.lstm_cell_size * 2,
            output_dim=p.encoder_out_dim)
        if p.cc_schedule is not None:
          proj_p.has_bias = False
          proj_p.activation = 'TANH'
        else:
          proj_p.has_bias = True
          proj_p.activation = 'NONE'
        self.CreateChild('final_proj', proj_p)

      dropout_p = layers.DropoutLayer.Params().Set(
          name='dropout_layer',
          keep_prob=1.0 - p.dropout_prob,
          random_seed=p.random_seed + 827366448 if p.random_seed else None)
      self.CreateChild('dropout', dropout_p)

      if p.is_transparent:
        transparent_params = p.transparent_merger_tpl.Copy()
        transparent_params.name = 'transparent'
        transparent_params.num_sources = p.num_lstm_layers
        self.CreateChild('transparent_merger', transparent_params)
예제 #19
0
파일: model.py 프로젝트: xueyongfu/lingvo
  def __init__(self, params):
    super(InsertionModel, self).__init__(params)
    p = self.params

    with tf.variable_scope(p.name):
      self.CreateChild('insertion', p.insertion)
예제 #20
0
 def _CreateLayerVariables(self):
   # Save a scope for lazily created variables.
   with tf.variable_scope('q'):
     self._qvars_scope = tf.get_variable_scope()