Exemple #1
0
        def body(handle, *arrays):
            """Runs the network and advances the state by a step."""

            with tf.control_dependencies([handle] + [x.flow for x in arrays]):
                # Get a copy of the network inside this while loop.
                updated_state = MasterState(handle, state.current_batch_size)
                network_tensors = self._feedforward_unit(
                    updated_state,
                    arrays,
                    network_states,
                    stride,
                    during_training=during_training)
                next_arrays = update_tensor_arrays(network_tensors, arrays)
                with tf.control_dependencies([x.flow for x in next_arrays]):
                    if self.num_actions == 1:  # deterministic; take oracle transition
                        handle = dragnn_ops.advance_from_oracle(
                            handle, component=self.name)
                    else:  # predict next transition using network logits
                        logits = self.network.get_logits(network_tensors)
                        logits = tf.cond(self.locally_normalize,
                                         lambda: tf.nn.log_softmax(logits),
                                         lambda: logits)
                        handle = dragnn_ops.advance_from_prediction(
                            handle, logits, component=self.name)
                return [handle] + next_arrays
Exemple #2
0
        def body(handle, cost, correct, total, *arrays):
            """Runs the network and advances the state by a step."""

            with tf.control_dependencies([handle, cost, correct, total] +
                                         [x.flow for x in arrays]):
                # Get a copy of the network inside this while loop.
                updated_state = MasterState(handle, state.current_batch_size)
                network_tensors = self._feedforward_unit(updated_state,
                                                         arrays,
                                                         network_states,
                                                         stride,
                                                         during_training=True)

                # Every layer is written to a TensorArray, so that it can be backprop'd.
                next_arrays = update_tensor_arrays(network_tensors, arrays)
                with tf.control_dependencies([x.flow for x in next_arrays]):
                    with tf.name_scope('compute_loss'):
                        # A gold label > -1 determines that the sentence is still
                        # in a valid state. Otherwise, the sentence has ended.
                        #
                        # We add only the valid sentences to the loss, in the following way:
                        #   1. We compute 'valid_ix', the indices in gold that contain
                        #      valid oracle actions.
                        #   2. We compute the cost function by comparing logits and gold
                        #      only for the valid indices.
                        gold = dragnn_ops.emit_oracle_labels(
                            handle, component=self.name)
                        gold.set_shape([None])
                        valid = tf.greater(gold, -1)
                        valid_ix = tf.reshape(tf.where(valid), [-1])
                        gold = tf.gather(gold, valid_ix)

                        logits = self.network.get_logits(network_tensors)
                        logits = tf.gather(logits, valid_ix)

                        cost += tf.reduce_sum(
                            tf.nn.sparse_softmax_cross_entropy_with_logits(
                                labels=tf.cast(gold, tf.int64), logits=logits))

                        if (self.eligible_for_self_norm and
                                self.master.hyperparams.self_norm_alpha > 0):
                            log_z = tf.reduce_logsumexp(logits, [1])
                            cost += (self.master.hyperparams.self_norm_alpha *
                                     tf.nn.l2_loss(log_z))

                        correct += tf.reduce_sum(
                            tf.to_int32(tf.nn.in_top_k(logits, gold, 1)))
                        total += tf.size(gold)

                with tf.control_dependencies([cost, correct, total, gold]):
                    handle = dragnn_ops.advance_from_oracle(
                        handle, component=self.name)
                return [handle, cost, correct, total] + next_arrays
Exemple #3
0
        def body(handle, cost, correct, total, *arrays):
            """Runs the network and advances the state by a step."""

            with tf.control_dependencies([handle, cost, correct, total] +
                                         [x.flow for x in arrays]):
                # Get a copy of the network inside this while loop.
                updated_state = MasterState(handle, state.current_batch_size)
                network_tensors = self._feedforward_unit(updated_state,
                                                         arrays,
                                                         network_states,
                                                         stride,
                                                         during_training=True)

                # Every layer is written to a TensorArray, so that it can be backprop'd.
                next_arrays = update_tensor_arrays(network_tensors, arrays)
                loss_function = self.attr('loss_function')
                with tf.control_dependencies([x.flow for x in next_arrays]):
                    with tf.name_scope('compute_loss'):
                        logits = self.network.get_logits(network_tensors)
                        if loss_function == 'softmax_cross_entropy':
                            gold = dragnn_ops.emit_oracle_labels(
                                handle, component=self.name)
                            new_cost, new_correct, new_total, valid_logits, valid_gold = (
                                build_softmax_cross_entropy_loss(logits, gold))

                            if (self.eligible_for_self_norm
                                    and self.master.hyperparams.self_norm_alpha
                                    > 0):
                                log_z = tf.reduce_logsumexp(valid_logits, [1])
                                new_cost += (
                                    self.master.hyperparams.self_norm_alpha *
                                    tf.nn.l2_loss(log_z))
                        elif loss_function == 'sigmoid_cross_entropy':
                            indices, gold, probs = (
                                dragnn_ops.
                                emit_oracle_labels_and_probabilities(
                                    handle, component=self.name))
                            new_cost, new_correct, new_total, valid_gold = (
                                build_sigmoid_cross_entropy_loss(
                                    logits, gold, indices, probs))
                        else:
                            RuntimeError("Unknown loss function '%s'" %
                                         loss_function)

                        cost += new_cost
                        correct += new_correct
                        total += new_total

                with tf.control_dependencies(
                    [cost, correct, total, valid_gold]):
                    handle = dragnn_ops.advance_from_oracle(
                        handle, component=self.name)
                return [handle, cost, correct, total] + next_arrays
Exemple #4
0
    def body(handle, cost, correct, total, *arrays):
      """Runs the network and advances the state by a step."""

      with tf.control_dependencies([handle, cost, correct, total] +
                                   [x.flow for x in arrays]):
        # Get a copy of the network inside this while loop.
        updated_state = MasterState(handle, state.current_batch_size)
        network_tensors = self._feedforward_unit(
            updated_state, arrays, network_states, stride, during_training=True)

        # Every layer is written to a TensorArray, so that it can be backprop'd.
        next_arrays = update_tensor_arrays(network_tensors, arrays)
        with tf.control_dependencies([x.flow for x in next_arrays]):
          with tf.name_scope('compute_loss'):
            # A gold label > -1 determines that the sentence is still
            # in a valid state. Otherwise, the sentence has ended.
            #
            # We add only the valid sentences to the loss, in the following way:
            #   1. We compute 'valid_ix', the indices in gold that contain
            #      valid oracle actions.
            #   2. We compute the cost function by comparing logits and gold
            #      only for the valid indices.
            gold = dragnn_ops.emit_oracle_labels(handle, component=self.name)
            gold.set_shape([None])
            valid = tf.greater(gold, -1)
            valid_ix = tf.reshape(tf.where(valid), [-1])
            gold = tf.gather(gold, valid_ix)

            logits = self.network.get_logits(network_tensors)
            logits = tf.gather(logits, valid_ix)

            cost += tf.reduce_sum(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=tf.cast(gold, tf.int64), logits=logits))

            if (self.eligible_for_self_norm and
                self.master.hyperparams.self_norm_alpha > 0):
              log_z = tf.reduce_logsumexp(logits, [1])
              cost += (self.master.hyperparams.self_norm_alpha *
                       tf.nn.l2_loss(log_z))

            correct += tf.reduce_sum(
                tf.to_int32(tf.nn.in_top_k(logits, gold, 1)))
            total += tf.size(gold)

        with tf.control_dependencies([cost, correct, total, gold]):
          handle = dragnn_ops.advance_from_oracle(handle, component=self.name)
        return [handle, cost, correct, total] + next_arrays
Exemple #5
0
    def body(handle, *arrays):
      """Runs the network and advances the state by a step."""

      with tf.control_dependencies([handle] + [x.flow for x in arrays]):
        # Get a copy of the network inside this while loop.
        updated_state = MasterState(handle, state.current_batch_size)
        network_tensors = self._feedforward_unit(
            updated_state,
            arrays,
            network_states,
            stride,
            during_training=during_training)
        next_arrays = update_tensor_arrays(network_tensors, arrays)
        with tf.control_dependencies([x.flow for x in next_arrays]):
          if self.num_actions == 1:  # deterministic; take oracle transition
            handle = dragnn_ops.advance_from_oracle(handle, component=self.name)
          else:  # predict next transition using network logits
            logits = self.network.get_logits(network_tensors)
            logits = tf.cond(self.locally_normalize,
                             lambda: tf.nn.log_softmax(logits), lambda: logits)
            handle = dragnn_ops.advance_from_prediction(
                handle, logits, component=self.name)
        return [handle] + next_arrays