def body(handle, *arrays): """Runs the network and advances the state by a step.""" with tf.control_dependencies([handle] + [x.flow for x in arrays]): # Get a copy of the network inside this while loop. updated_state = MasterState(handle, state.current_batch_size) network_tensors = self._feedforward_unit( updated_state, arrays, network_states, stride, during_training=during_training) next_arrays = update_tensor_arrays(network_tensors, arrays) with tf.control_dependencies([x.flow for x in next_arrays]): if self.num_actions == 1: # deterministic; take oracle transition handle = dragnn_ops.advance_from_oracle( handle, component=self.name) else: # predict next transition using network logits logits = self.network.get_logits(network_tensors) logits = tf.cond(self.locally_normalize, lambda: tf.nn.log_softmax(logits), lambda: logits) handle = dragnn_ops.advance_from_prediction( handle, logits, component=self.name) return [handle] + next_arrays
def body(handle, cost, correct, total, *arrays): """Runs the network and advances the state by a step.""" with tf.control_dependencies([handle, cost, correct, total] + [x.flow for x in arrays]): # Get a copy of the network inside this while loop. updated_state = MasterState(handle, state.current_batch_size) network_tensors = self._feedforward_unit(updated_state, arrays, network_states, stride, during_training=True) # Every layer is written to a TensorArray, so that it can be backprop'd. next_arrays = update_tensor_arrays(network_tensors, arrays) with tf.control_dependencies([x.flow for x in next_arrays]): with tf.name_scope('compute_loss'): # A gold label > -1 determines that the sentence is still # in a valid state. Otherwise, the sentence has ended. # # We add only the valid sentences to the loss, in the following way: # 1. We compute 'valid_ix', the indices in gold that contain # valid oracle actions. # 2. We compute the cost function by comparing logits and gold # only for the valid indices. gold = dragnn_ops.emit_oracle_labels( handle, component=self.name) gold.set_shape([None]) valid = tf.greater(gold, -1) valid_ix = tf.reshape(tf.where(valid), [-1]) gold = tf.gather(gold, valid_ix) logits = self.network.get_logits(network_tensors) logits = tf.gather(logits, valid_ix) cost += tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.cast(gold, tf.int64), logits=logits)) if (self.eligible_for_self_norm and self.master.hyperparams.self_norm_alpha > 0): log_z = tf.reduce_logsumexp(logits, [1]) cost += (self.master.hyperparams.self_norm_alpha * tf.nn.l2_loss(log_z)) correct += tf.reduce_sum( tf.to_int32(tf.nn.in_top_k(logits, gold, 1))) total += tf.size(gold) with tf.control_dependencies([cost, correct, total, gold]): handle = dragnn_ops.advance_from_oracle( handle, component=self.name) return [handle, cost, correct, total] + next_arrays
def body(handle, cost, correct, total, *arrays): """Runs the network and advances the state by a step.""" with tf.control_dependencies([handle, cost, correct, total] + [x.flow for x in arrays]): # Get a copy of the network inside this while loop. updated_state = MasterState(handle, state.current_batch_size) network_tensors = self._feedforward_unit(updated_state, arrays, network_states, stride, during_training=True) # Every layer is written to a TensorArray, so that it can be backprop'd. next_arrays = update_tensor_arrays(network_tensors, arrays) loss_function = self.attr('loss_function') with tf.control_dependencies([x.flow for x in next_arrays]): with tf.name_scope('compute_loss'): logits = self.network.get_logits(network_tensors) if loss_function == 'softmax_cross_entropy': gold = dragnn_ops.emit_oracle_labels( handle, component=self.name) new_cost, new_correct, new_total, valid_logits, valid_gold = ( build_softmax_cross_entropy_loss(logits, gold)) if (self.eligible_for_self_norm and self.master.hyperparams.self_norm_alpha > 0): log_z = tf.reduce_logsumexp(valid_logits, [1]) new_cost += ( self.master.hyperparams.self_norm_alpha * tf.nn.l2_loss(log_z)) elif loss_function == 'sigmoid_cross_entropy': indices, gold, probs = ( dragnn_ops. emit_oracle_labels_and_probabilities( handle, component=self.name)) new_cost, new_correct, new_total, valid_gold = ( build_sigmoid_cross_entropy_loss( logits, gold, indices, probs)) else: RuntimeError("Unknown loss function '%s'" % loss_function) cost += new_cost correct += new_correct total += new_total with tf.control_dependencies( [cost, correct, total, valid_gold]): handle = dragnn_ops.advance_from_oracle( handle, component=self.name) return [handle, cost, correct, total] + next_arrays
def body(handle, cost, correct, total, *arrays): """Runs the network and advances the state by a step.""" with tf.control_dependencies([handle, cost, correct, total] + [x.flow for x in arrays]): # Get a copy of the network inside this while loop. updated_state = MasterState(handle, state.current_batch_size) network_tensors = self._feedforward_unit( updated_state, arrays, network_states, stride, during_training=True) # Every layer is written to a TensorArray, so that it can be backprop'd. next_arrays = update_tensor_arrays(network_tensors, arrays) with tf.control_dependencies([x.flow for x in next_arrays]): with tf.name_scope('compute_loss'): # A gold label > -1 determines that the sentence is still # in a valid state. Otherwise, the sentence has ended. # # We add only the valid sentences to the loss, in the following way: # 1. We compute 'valid_ix', the indices in gold that contain # valid oracle actions. # 2. We compute the cost function by comparing logits and gold # only for the valid indices. gold = dragnn_ops.emit_oracle_labels(handle, component=self.name) gold.set_shape([None]) valid = tf.greater(gold, -1) valid_ix = tf.reshape(tf.where(valid), [-1]) gold = tf.gather(gold, valid_ix) logits = self.network.get_logits(network_tensors) logits = tf.gather(logits, valid_ix) cost += tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.cast(gold, tf.int64), logits=logits)) if (self.eligible_for_self_norm and self.master.hyperparams.self_norm_alpha > 0): log_z = tf.reduce_logsumexp(logits, [1]) cost += (self.master.hyperparams.self_norm_alpha * tf.nn.l2_loss(log_z)) correct += tf.reduce_sum( tf.to_int32(tf.nn.in_top_k(logits, gold, 1))) total += tf.size(gold) with tf.control_dependencies([cost, correct, total, gold]): handle = dragnn_ops.advance_from_oracle(handle, component=self.name) return [handle, cost, correct, total] + next_arrays
def body(handle, *arrays): """Runs the network and advances the state by a step.""" with tf.control_dependencies([handle] + [x.flow for x in arrays]): # Get a copy of the network inside this while loop. updated_state = MasterState(handle, state.current_batch_size) network_tensors = self._feedforward_unit( updated_state, arrays, network_states, stride, during_training=during_training) next_arrays = update_tensor_arrays(network_tensors, arrays) with tf.control_dependencies([x.flow for x in next_arrays]): if self.num_actions == 1: # deterministic; take oracle transition handle = dragnn_ops.advance_from_oracle(handle, component=self.name) else: # predict next transition using network logits logits = self.network.get_logits(network_tensors) logits = tf.cond(self.locally_normalize, lambda: tf.nn.log_softmax(logits), lambda: logits) handle = dragnn_ops.advance_from_prediction( handle, logits, component=self.name) return [handle] + next_arrays