def AddTraining(self, task_context, batch_size, learning_rate=0.1, decay_steps=4000, momentum=None, corpus_name='documents'): with tf.name_scope('training'): n = self.training n['accumulated_alive_steps'] = self._AddVariable( [batch_size], tf.int32, 'accumulated_alive_steps', tf.zeros_initializer()) n.update(self._AddBeamReader(task_context, batch_size, corpus_name)) # This adds a required 'step' node too: learning_rate = tf.constant(learning_rate, dtype=tf.float32) n['learning_rate'] = self._AddLearningRate(learning_rate, decay_steps) # Call BuildNetwork *only* to set up the params outside of the main loop. self._BuildNetwork(list(n['features'])) n.update(self._BuildSequence(batch_size, self._max_steps, n['features'], n['state'])) flat_concat_scores = tf.reshape(n['concat_scores'], [-1]) (indices_and_paths, beams_and_slots, n['gold_slot'], n[ 'beam_path_scores']) = gen_parser_ops.beam_parser_output(n[ 'state']) n['indices'] = tf.reshape(tf.gather(indices_and_paths, [0]), [-1]) n['path_ids'] = tf.reshape(tf.gather(indices_and_paths, [1]), [-1]) n['all_path_scores'] = tf.sparse_segment_sum( flat_concat_scores, n['indices'], n['path_ids']) n['beam_ids'] = tf.reshape(tf.gather(beams_and_slots, [0]), [-1]) n.update(AddCrossEntropy(batch_size, n)) if self._only_train: trainable_params = {k: v for k, v in self.params.iteritems() if k in self._only_train} else: trainable_params = self.params for p in trainable_params: tf.logging.info('trainable_param: %s', p) regularized_params = [ tf.nn.l2_loss(p) for k, p in trainable_params.iteritems() if k.startswith('weights') or k.startswith('bias')] l2_loss = 1e-4 * tf.add_n(regularized_params) if regularized_params else 0 n['cost'] = tf.add(n['cross_entropy'], l2_loss, name='cost') n['gradients'] = tf.gradients(n['cost'], trainable_params.values()) with tf.control_dependencies([n['alive_steps']]): update_accumulators = tf.group( tf.assign_add(n['accumulated_alive_steps'], n['alive_steps'])) def ResetAccumulators(): return tf.assign( n['accumulated_alive_steps'], tf.zeros([batch_size], tf.int32)) n['reset_accumulators_func'] = ResetAccumulators optimizer = tf.train.MomentumOptimizer(n['learning_rate'], momentum, use_locking=self._use_locking) train_op = optimizer.minimize(n['cost'], var_list=trainable_params.values()) for param in trainable_params.values(): slot = optimizer.get_slot(param, 'momentum') self.inits[slot.name] = state_ops.init_variable(slot, tf.zeros_initializer()) self.variables[slot.name] = slot def NumericalChecks(): return tf.group(*[ tf.check_numerics(param, message='Parameter is not finite.') for param in trainable_params.values() if param.dtype.base_dtype in [tf.float32, tf.float64]]) check_op = cf.cond(tf.equal(tf.mod(self.GetStep(), self._check_every), 0), NumericalChecks, tf.no_op) avg_update_op = tf.group(*self._averaging.values()) train_ops = [train_op] if self._check_parameters: train_ops.append(check_op) if self._use_averaging: train_ops.append(avg_update_op) with tf.control_dependencies([update_accumulators]): n['train_op'] = tf.group(*train_ops, name='train_op') n['alive_steps'] = tf.identity(n['alive_steps'], name='alive_steps') return n
def AddTraining(self, task_context, batch_size, learning_rate=0.1, decay_steps=4000, momentum=None, corpus_name='documents'): with tf.name_scope('training'): n = self.training n['accumulated_alive_steps'] = self._AddVariable( [batch_size], tf.int32, 'accumulated_alive_steps', tf.zeros_initializer()) n.update(self._AddBeamReader(task_context, batch_size, corpus_name)) # This adds a required 'step' node too: learning_rate = tf.constant(learning_rate, dtype=tf.float32) n['learning_rate'] = self._AddLearningRate(learning_rate, decay_steps) # Call BuildNetwork *only* to set up the params outside of the main loop. self._BuildNetwork(list(n['features'])) n.update( self._BuildSequence(batch_size, self._max_steps, n['features'], n['state'])) flat_concat_scores = tf.reshape(n['concat_scores'], [-1]) (indices_and_paths, beams_and_slots, n['gold_slot'], n['beam_path_scores']) = gen_parser_ops.beam_parser_output( n['state']) n['indices'] = tf.reshape(tf.gather(indices_and_paths, [0]), [-1]) n['path_ids'] = tf.reshape(tf.gather(indices_and_paths, [1]), [-1]) n['all_path_scores'] = tf.sparse_segment_sum( flat_concat_scores, n['indices'], n['path_ids']) n['beam_ids'] = tf.reshape(tf.gather(beams_and_slots, [0]), [-1]) n.update(AddCrossEntropy(batch_size, n)) if self._only_train: trainable_params = { k: v for k, v in self.params.iteritems() if k in self._only_train } else: trainable_params = self.params for p in trainable_params: tf.logging.info('trainable_param: %s', p) regularized_params = [ tf.nn.l2_loss(p) for k, p in trainable_params.iteritems() if k.startswith('weights') or k.startswith('bias') ] l2_loss = 1e-4 * tf.add_n( regularized_params) if regularized_params else 0 n['cost'] = tf.add(n['cross_entropy'], l2_loss, name='cost') n['gradients'] = tf.gradients(n['cost'], trainable_params.values()) with tf.control_dependencies([n['alive_steps']]): update_accumulators = tf.group( tf.assign_add(n['accumulated_alive_steps'], n['alive_steps'])) def ResetAccumulators(): return tf.assign(n['accumulated_alive_steps'], tf.zeros([batch_size], tf.int32)) n['reset_accumulators_func'] = ResetAccumulators optimizer = tf.train.MomentumOptimizer( n['learning_rate'], momentum, use_locking=self._use_locking) train_op = optimizer.minimize(n['cost'], var_list=trainable_params.values()) for param in trainable_params.values(): slot = optimizer.get_slot(param, 'momentum') self.inits[slot.name] = state_ops.init_variable( slot, tf.zeros_initializer()) self.variables[slot.name] = slot def NumericalChecks(): return tf.group(*[ tf.check_numerics(param, message='Parameter is not finite.') for param in trainable_params.values() if param.dtype.base_dtype in [tf.float32, tf.float64] ]) check_op = cf.cond( tf.equal(tf.mod(self.GetStep(), self._check_every), 0), NumericalChecks, tf.no_op) avg_update_op = tf.group(*self._averaging.values()) train_ops = [train_op] if self._check_parameters: train_ops.append(check_op) if self._use_averaging: train_ops.append(avg_update_op) with tf.control_dependencies([update_accumulators]): n['train_op'] = tf.group(*train_ops, name='train_op') n['alive_steps'] = tf.identity(n['alive_steps'], name='alive_steps') return n