def build_training(self, handle, compute_gradients=True, use_moving_average=False, advance_counters=True, component_weights=None, unroll_using_oracle=None, max_index=-1): """Builds a training pipeline. Args: handle: Handle tensor for the ComputeSession. compute_gradients: Whether to generate gradients and an optimizer op. When False, build_training will return a 'dry run' training op, used normally only for oracle tracing. use_moving_average: Whether or not to read from the moving average variables instead of the true parameters. Note: it is not possible to make gradient updates when this is True. advance_counters: Whether or not this loop should increment the per-component step counters. component_weights: If set, this is a list of relative weights each component's cost should get in the pipeline. Defaults to 1.0 for each component. unroll_using_oracle: If set, this is a list of booleans indicating whether or not to use the gold decodings for each component. Defaults to True for each component. max_index: Training will use only the first max_index components, or -1 for all components. Returns: handle: to the ComputeSession, conditioned on completing training step. outputs: a dictionary of useful training tensors. Raises: IndexError: if max_index is positive but out of bounds. """ check.IsFalse( compute_gradients and use_moving_average, 'It is not possible to make gradient updates when reading ' 'from the moving average variables.') self.read_from_avg = use_moving_average if max_index < 0: max_index = len(self.components) else: if not 0 < max_index <= len(self.components): raise IndexError( 'Invalid max_index {} for components {}; handle {}'.format( max_index, self.component_names, handle.name)) # By default, we train every component supervised. if not component_weights: component_weights = [1] * max_index if not unroll_using_oracle: unroll_using_oracle = [True] * max_index if not max_index <= len(unroll_using_oracle): raise IndexError( ('Invalid max_index {} for unroll_using_oracle {}; ' 'handle {}').format(max_index, unroll_using_oracle, handle.name)) component_weights = component_weights[:max_index] total_weight = (float)(sum(component_weights)) component_weights = [w / total_weight for w in component_weights] unroll_using_oracle = unroll_using_oracle[:max_index] logging.info('Creating training target:') logging.info('\tWeights: %s', component_weights) logging.info('\tOracle: %s', unroll_using_oracle) metrics_list = [] cost = tf.constant(0.) effective_batch = tf.constant(0) avg_ops = [] params_to_train = [] network_states = {} for component_index in range(0, max_index): comp = self.components[component_index] network_states[comp.name] = component.NetworkState() logging.info('Initializing data for component "%s"', comp.name) handle = dragnn_ops.init_component_data( handle, beam_size=comp.training_beam_size, component=comp.name) # TODO(googleuser): Phase out component.MasterState. master_state = component.MasterState( handle, dragnn_ops.batch_size(handle, component=comp.name)) with tf.control_dependencies([handle, cost]): args = (master_state, network_states) if unroll_using_oracle[component_index]: handle, component_cost, component_correct, component_total = ( tf.cond(comp.training_beam_size > 1, lambda: comp.build_structured_training(*args), lambda: comp.build_greedy_training(*args))) else: handle = comp.build_greedy_inference(*args, during_training=True) component_cost = tf.constant(0.) component_correct, component_total = tf.constant( 0), tf.constant(0) weighted_component_cost = tf.multiply( component_cost, tf.constant((float)(component_weights[component_index])), name='weighted_component_cost') cost += weighted_component_cost effective_batch += component_total metrics_list += [[component_total], [component_correct]] if advance_counters: with tf.control_dependencies( [comp.advance_counters(component_total)]): cost = tf.identity(cost) # Keep track of which parameters will be trained, and any moving # average updates to apply for these parameters. params_to_train += comp.network.params if self.hyperparams.use_moving_average: avg_ops += comp.avg_ops # Concatenate evaluation results metrics = tf.concat(metrics_list, 0) # If gradient computation is requested, then: # 1. compute the gradients, # 2. add an optimizer to update the parameters using the gradients, # 3. make the ComputeSession handle depend on the optimizer. gradient_norm = tf.constant(0.) if compute_gradients: logging.info('Creating train op with %d variables:\n\t%s', len(params_to_train), '\n\t'.join([x.name for x in params_to_train])) grads_and_vars = self.optimizer.compute_gradients( cost, var_list=params_to_train) clipped_gradients = [(self._clip_gradients(g), v) for g, v in grads_and_vars] gradient_norm = tf.global_norm(list(zip(*clipped_gradients))[0]) minimize_op = self.optimizer.apply_gradients( clipped_gradients, global_step=self.master_vars['step']) if self.hyperparams.use_moving_average: with tf.control_dependencies([minimize_op]): minimize_op = tf.group(*avg_ops) # Make sure all the side-effectful minimizations ops finish before # proceeding. with tf.control_dependencies([minimize_op]): handle = tf.identity(handle) # Restore that subsequent builds don't use average by default. self.read_from_avg = False cost = tf.check_numerics(cost, message='Cost is not finite.') # Returns named access to common outputs. outputs = { 'cost': cost, 'gradient_norm': gradient_norm, 'batch': effective_batch, 'metrics': metrics, } return handle, outputs
def testCheckIsFalse(self): check.IsFalse(1 == 2, 'foo') check.IsFalse(False, 'foo') check.IsFalse([], 'foo') check.IsFalse({}, 'foo') check.IsFalse(0, 'foo') check.IsFalse(None, 'foo') with self.assertRaisesRegexp(ValueError, 'bar'): check.IsFalse(True, 'bar') with self.assertRaisesRegexp(ValueError, 'bar'): check.IsFalse(not None, 'bar') with self.assertRaisesRegexp(ValueError, 'bar'): check.IsFalse(1, 'bar') with self.assertRaisesRegexp(ValueError, 'bar'): check.IsFalse([0], 'bar') with self.assertRaisesRegexp(ValueError, 'bar'): check.IsFalse({'x': 1}, 'bar') with self.assertRaisesRegexp(RuntimeError, 'baz'): check.IsFalse(' ', 'baz', RuntimeError)