def AddTraining(self,
                  task_context,
                  batch_size,
                  learning_rate=0.1,
                  decay_steps=4000,
                  momentum=None,
                  corpus_name='documents'):
    with tf.name_scope('training'):
      n = self.training
      n['accumulated_alive_steps'] = self._AddVariable(
          [batch_size], tf.int32, 'accumulated_alive_steps',
          tf.zeros_initializer())
      n.update(self._AddBeamReader(task_context, batch_size, corpus_name))
      # This adds a required 'step' node too:
      learning_rate = tf.constant(learning_rate, dtype=tf.float32)
      n['learning_rate'] = self._AddLearningRate(learning_rate, decay_steps)
      # Call BuildNetwork *only* to set up the params outside of the main loop.
      self._BuildNetwork(list(n['features']))

      n.update(self._BuildSequence(batch_size, self._max_steps, n['features'],
                                   n['state']))

      flat_concat_scores = tf.reshape(n['concat_scores'], [-1])
      (indices_and_paths, beams_and_slots, n['gold_slot'], n[
          'beam_path_scores']) = gen_parser_ops.beam_parser_output(n[
              'state'])
      n['indices'] = tf.reshape(tf.gather(indices_and_paths, [0]), [-1])
      n['path_ids'] = tf.reshape(tf.gather(indices_and_paths, [1]), [-1])
      n['all_path_scores'] = tf.sparse_segment_sum(
          flat_concat_scores, n['indices'], n['path_ids'])
      n['beam_ids'] = tf.reshape(tf.gather(beams_and_slots, [0]), [-1])
      n.update(AddCrossEntropy(batch_size, n))

      if self._only_train:
        trainable_params = {k: v for k, v in self.params.iteritems()
                            if k in self._only_train}
      else:
        trainable_params = self.params
      for p in trainable_params:
        tf.logging.info('trainable_param: %s', p)

      regularized_params = [
          tf.nn.l2_loss(p) for k, p in trainable_params.iteritems()
          if k.startswith('weights') or k.startswith('bias')]
      l2_loss = 1e-4 * tf.add_n(regularized_params) if regularized_params else 0

      n['cost'] = tf.add(n['cross_entropy'], l2_loss, name='cost')

      n['gradients'] = tf.gradients(n['cost'], trainable_params.values())

      with tf.control_dependencies([n['alive_steps']]):
        update_accumulators = tf.group(
            tf.assign_add(n['accumulated_alive_steps'], n['alive_steps']))

      def ResetAccumulators():
        return tf.assign(
            n['accumulated_alive_steps'], tf.zeros([batch_size], tf.int32))
      n['reset_accumulators_func'] = ResetAccumulators

      optimizer = tf.train.MomentumOptimizer(n['learning_rate'],
                                             momentum,
                                             use_locking=self._use_locking)
      train_op = optimizer.minimize(n['cost'],
                                    var_list=trainable_params.values())
      for param in trainable_params.values():
        slot = optimizer.get_slot(param, 'momentum')
        self.inits[slot.name] = state_ops.init_variable(slot,
                                                        tf.zeros_initializer())
        self.variables[slot.name] = slot

      def NumericalChecks():
        return tf.group(*[
            tf.check_numerics(param, message='Parameter is not finite.')
            for param in trainable_params.values()
            if param.dtype.base_dtype in [tf.float32, tf.float64]])
      check_op = cf.cond(tf.equal(tf.mod(self.GetStep(), self._check_every), 0),
                         NumericalChecks, tf.no_op)
      avg_update_op = tf.group(*self._averaging.values())
      train_ops = [train_op]
      if self._check_parameters:
        train_ops.append(check_op)
      if self._use_averaging:
        train_ops.append(avg_update_op)
      with tf.control_dependencies([update_accumulators]):
        n['train_op'] = tf.group(*train_ops, name='train_op')
      n['alive_steps'] = tf.identity(n['alive_steps'], name='alive_steps')
    return n
Exemple #2
0
    def AddTraining(self,
                    task_context,
                    batch_size,
                    learning_rate=0.1,
                    decay_steps=4000,
                    momentum=None,
                    corpus_name='documents'):
        with tf.name_scope('training'):
            n = self.training
            n['accumulated_alive_steps'] = self._AddVariable(
                [batch_size], tf.int32, 'accumulated_alive_steps',
                tf.zeros_initializer())
            n.update(self._AddBeamReader(task_context, batch_size,
                                         corpus_name))
            # This adds a required 'step' node too:
            learning_rate = tf.constant(learning_rate, dtype=tf.float32)
            n['learning_rate'] = self._AddLearningRate(learning_rate,
                                                       decay_steps)
            # Call BuildNetwork *only* to set up the params outside of the main loop.
            self._BuildNetwork(list(n['features']))

            n.update(
                self._BuildSequence(batch_size, self._max_steps, n['features'],
                                    n['state']))

            flat_concat_scores = tf.reshape(n['concat_scores'], [-1])
            (indices_and_paths, beams_and_slots, n['gold_slot'],
             n['beam_path_scores']) = gen_parser_ops.beam_parser_output(
                 n['state'])
            n['indices'] = tf.reshape(tf.gather(indices_and_paths, [0]), [-1])
            n['path_ids'] = tf.reshape(tf.gather(indices_and_paths, [1]), [-1])
            n['all_path_scores'] = tf.sparse_segment_sum(
                flat_concat_scores, n['indices'], n['path_ids'])
            n['beam_ids'] = tf.reshape(tf.gather(beams_and_slots, [0]), [-1])
            n.update(AddCrossEntropy(batch_size, n))

            if self._only_train:
                trainable_params = {
                    k: v
                    for k, v in self.params.iteritems()
                    if k in self._only_train
                }
            else:
                trainable_params = self.params
            for p in trainable_params:
                tf.logging.info('trainable_param: %s', p)

            regularized_params = [
                tf.nn.l2_loss(p) for k, p in trainable_params.iteritems()
                if k.startswith('weights') or k.startswith('bias')
            ]
            l2_loss = 1e-4 * tf.add_n(
                regularized_params) if regularized_params else 0

            n['cost'] = tf.add(n['cross_entropy'], l2_loss, name='cost')

            n['gradients'] = tf.gradients(n['cost'], trainable_params.values())

            with tf.control_dependencies([n['alive_steps']]):
                update_accumulators = tf.group(
                    tf.assign_add(n['accumulated_alive_steps'],
                                  n['alive_steps']))

            def ResetAccumulators():
                return tf.assign(n['accumulated_alive_steps'],
                                 tf.zeros([batch_size], tf.int32))

            n['reset_accumulators_func'] = ResetAccumulators

            optimizer = tf.train.MomentumOptimizer(
                n['learning_rate'], momentum, use_locking=self._use_locking)
            train_op = optimizer.minimize(n['cost'],
                                          var_list=trainable_params.values())
            for param in trainable_params.values():
                slot = optimizer.get_slot(param, 'momentum')
                self.inits[slot.name] = state_ops.init_variable(
                    slot, tf.zeros_initializer())
                self.variables[slot.name] = slot

            def NumericalChecks():
                return tf.group(*[
                    tf.check_numerics(param,
                                      message='Parameter is not finite.')
                    for param in trainable_params.values()
                    if param.dtype.base_dtype in [tf.float32, tf.float64]
                ])

            check_op = cf.cond(
                tf.equal(tf.mod(self.GetStep(), self._check_every), 0),
                NumericalChecks, tf.no_op)
            avg_update_op = tf.group(*self._averaging.values())
            train_ops = [train_op]
            if self._check_parameters:
                train_ops.append(check_op)
            if self._use_averaging:
                train_ops.append(avg_update_op)
            with tf.control_dependencies([update_accumulators]):
                n['train_op'] = tf.group(*train_ops, name='train_op')
            n['alive_steps'] = tf.identity(n['alive_steps'],
                                           name='alive_steps')
        return n