예제 #1
0
    def build_training(self,
                       handle,
                       compute_gradients=True,
                       use_moving_average=False,
                       advance_counters=True,
                       component_weights=None,
                       unroll_using_oracle=None,
                       max_index=-1):
        """Builds a training pipeline.

    Args:
      handle: Handle tensor for the ComputeSession.
      compute_gradients: Whether to generate gradients and an optimizer op.
        When False, build_training will return a 'dry run' training op,
        used normally only for oracle tracing.
      use_moving_average: Whether or not to read from the moving
        average variables instead of the true parameters. Note: it is not
        possible to make gradient updates when this is True.
      advance_counters: Whether or not this loop should increment the
        per-component step counters.
      component_weights: If set, this is a list of relative weights
        each component's cost should get in the pipeline. Defaults to 1.0 for
        each component.
      unroll_using_oracle: If set, this is a list of booleans indicating
        whether or not to use the gold decodings for each component. Defaults
        to True for each component.
      max_index: Training will use only the first max_index components,
        or -1 for all components.

    Returns:
      handle: to the ComputeSession, conditioned on completing training step.
      outputs: a dictionary of useful training tensors.

    Raises:
      IndexError: if max_index is positive but out of bounds.
    """
        check.IsFalse(
            compute_gradients and use_moving_average,
            'It is not possible to make gradient updates when reading '
            'from the moving average variables.')

        self.read_from_avg = use_moving_average
        if max_index < 0:
            max_index = len(self.components)
        else:
            if not 0 < max_index <= len(self.components):
                raise IndexError(
                    'Invalid max_index {} for components {}; handle {}'.format(
                        max_index, self.component_names, handle.name))

        # By default, we train every component supervised.
        if not component_weights:
            component_weights = [1] * max_index
        if not unroll_using_oracle:
            unroll_using_oracle = [True] * max_index

        if not max_index <= len(unroll_using_oracle):
            raise IndexError(
                ('Invalid max_index {} for unroll_using_oracle {}; '
                 'handle {}').format(max_index, unroll_using_oracle,
                                     handle.name))

        component_weights = component_weights[:max_index]
        total_weight = (float)(sum(component_weights))
        component_weights = [w / total_weight for w in component_weights]

        unroll_using_oracle = unroll_using_oracle[:max_index]

        logging.info('Creating training target:')
        logging.info('\tWeights: %s', component_weights)
        logging.info('\tOracle: %s', unroll_using_oracle)

        metrics_list = []
        cost = tf.constant(0.)
        effective_batch = tf.constant(0)

        avg_ops = []
        params_to_train = []

        network_states = {}
        for component_index in range(0, max_index):
            comp = self.components[component_index]
            network_states[comp.name] = component.NetworkState()

            logging.info('Initializing data for component "%s"', comp.name)
            handle = dragnn_ops.init_component_data(
                handle, beam_size=comp.training_beam_size, component=comp.name)
            # TODO(googleuser): Phase out component.MasterState.
            master_state = component.MasterState(
                handle, dragnn_ops.batch_size(handle, component=comp.name))
            with tf.control_dependencies([handle, cost]):
                args = (master_state, network_states)
                if unroll_using_oracle[component_index]:

                    handle, component_cost, component_correct, component_total = (
                        tf.cond(comp.training_beam_size > 1,
                                lambda: comp.build_structured_training(*args),
                                lambda: comp.build_greedy_training(*args)))

                else:
                    handle = comp.build_greedy_inference(*args,
                                                         during_training=True)
                    component_cost = tf.constant(0.)
                    component_correct, component_total = tf.constant(
                        0), tf.constant(0)

                weighted_component_cost = tf.multiply(
                    component_cost,
                    tf.constant((float)(component_weights[component_index])),
                    name='weighted_component_cost')

                cost += weighted_component_cost
                effective_batch += component_total
                metrics_list += [[component_total], [component_correct]]

                if advance_counters:
                    with tf.control_dependencies(
                        [comp.advance_counters(component_total)]):
                        cost = tf.identity(cost)

                # Keep track of which parameters will be trained, and any moving
                # average updates to apply for these parameters.
                params_to_train += comp.network.params
                if self.hyperparams.use_moving_average:
                    avg_ops += comp.avg_ops

        # Concatenate evaluation results
        metrics = tf.concat(metrics_list, 0)

        # If gradient computation is requested, then:
        # 1. compute the gradients,
        # 2. add an optimizer to update the parameters using the gradients,
        # 3. make the ComputeSession handle depend on the optimizer.
        gradient_norm = tf.constant(0.)
        if compute_gradients:
            logging.info('Creating train op with %d variables:\n\t%s',
                         len(params_to_train),
                         '\n\t'.join([x.name for x in params_to_train]))

            grads_and_vars = self.optimizer.compute_gradients(
                cost, var_list=params_to_train)
            clipped_gradients = [(self._clip_gradients(g), v)
                                 for g, v in grads_and_vars]
            gradient_norm = tf.global_norm(list(zip(*clipped_gradients))[0])

            minimize_op = self.optimizer.apply_gradients(
                clipped_gradients, global_step=self.master_vars['step'])

            if self.hyperparams.use_moving_average:
                with tf.control_dependencies([minimize_op]):
                    minimize_op = tf.group(*avg_ops)

            # Make sure all the side-effectful minimizations ops finish before
            # proceeding.
            with tf.control_dependencies([minimize_op]):
                handle = tf.identity(handle)

        # Restore that subsequent builds don't use average by default.
        self.read_from_avg = False

        cost = tf.check_numerics(cost, message='Cost is not finite.')

        # Returns named access to common outputs.
        outputs = {
            'cost': cost,
            'gradient_norm': gradient_norm,
            'batch': effective_batch,
            'metrics': metrics,
        }
        return handle, outputs
예제 #2
0
 def testCheckIsFalse(self):
     check.IsFalse(1 == 2, 'foo')
     check.IsFalse(False, 'foo')
     check.IsFalse([], 'foo')
     check.IsFalse({}, 'foo')
     check.IsFalse(0, 'foo')
     check.IsFalse(None, 'foo')
     with self.assertRaisesRegexp(ValueError, 'bar'):
         check.IsFalse(True, 'bar')
     with self.assertRaisesRegexp(ValueError, 'bar'):
         check.IsFalse(not None, 'bar')
     with self.assertRaisesRegexp(ValueError, 'bar'):
         check.IsFalse(1, 'bar')
     with self.assertRaisesRegexp(ValueError, 'bar'):
         check.IsFalse([0], 'bar')
     with self.assertRaisesRegexp(ValueError, 'bar'):
         check.IsFalse({'x': 1}, 'bar')
     with self.assertRaisesRegexp(RuntimeError, 'baz'):
         check.IsFalse(' ', 'baz', RuntimeError)