def _eval_predict(self): self.rel_probs = tf.nn.softmax(self.logits, axis=2) # (b x n x r x n) self.n_tokens = tf.cast( tf.reduce_sum(self.features[constants.LENGTH_KEY]), tf.int32) _logits = select_logits(self.logits, self.predicate_indices, self.n_steps) # (b x n x r) self.predictions = crf.crf_decode( _logits, self._tag_transitions, tf.cast(self._sequence_lengths, tf.int32))[0]
def testCrfDecodeZeroSeqLength(self): """ Test that crf_decode works when sequence_length contains one or more zeros. """ with self.test_session() as sess: inputs = constant_op.constant(np.ones([2, 10, 5], dtype=np.float32)) transition_params = constant_op.constant(np.ones([5, 5], dtype=np.float32)) sequence_lengths = constant_op.constant(np.zeros([2], dtype=np.int32)) values = crf.crf_decode(inputs, transition_params, sequence_lengths) tags, scores = sess.run(values) self.assertEqual(len(tags.shape), 2) self.assertEqual(len(scores.shape), 1)
def testCrfDecodeZeroSeqLength(self): """ Test that crf_decode works when sequence_length contains one or more zeros. """ with self.test_session() as sess: inputs = constant_op.constant(np.ones([2, 10, 5], dtype=np.float32)) transition_params = constant_op.constant( np.ones([5, 5], dtype=np.float32)) sequence_lengths = constant_op.constant( np.zeros([2], dtype=np.int32)) tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths) tf_tags, tf_scores = sess.run([tags, scores]) self.assertEqual(len(tf_tags.shape), 2) self.assertEqual(len(tf_scores.shape), 1)
def testCrfDecode(self): inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] with self.test_session() as sess: all_sequence_scores = [] all_sequences = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequences.append(tag_indices) sequence_score = crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims( sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) sequence_score = array_ops.squeeze(sequence_score, [0]) all_sequence_scores.append(sequence_score) tf_all_sequence_scores = sess.run(all_sequence_scores) expected_max_sequence_index = np.argmax(tf_all_sequence_scores) expected_max_sequence = all_sequences[expected_max_sequence_index] expected_max_score = tf_all_sequence_scores[ expected_max_sequence_index] actual_max_sequence, actual_max_score = crf.crf_decode( array_ops.expand_dims(inputs, 0), constant_op.constant(transition_params), array_ops.expand_dims(sequence_lengths, 0)) actual_max_sequence = array_ops.squeeze(actual_max_sequence, [0]) actual_max_score = array_ops.squeeze(actual_max_score, [0]) tf_actual_max_sequence, tf_actual_max_score = sess.run( [actual_max_sequence, actual_max_score]) self.assertAllClose(tf_actual_max_score, expected_max_score) self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]), expected_max_sequence[:sequence_lengths])
def testCrfDecode(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] with self.test_session() as sess: all_sequence_scores = [] all_sequences = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product( range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequences.append(tag_indices) sequence_score = crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) sequence_score = array_ops.squeeze(sequence_score, [0]) all_sequence_scores.append(sequence_score) tf_all_sequence_scores = sess.run(all_sequence_scores) expected_max_sequence_index = np.argmax(tf_all_sequence_scores) expected_max_sequence = all_sequences[expected_max_sequence_index] expected_max_score = tf_all_sequence_scores[expected_max_sequence_index] actual_max_sequence, actual_max_score = crf.crf_decode( array_ops.expand_dims(inputs, 0), constant_op.constant(transition_params), array_ops.expand_dims(sequence_lengths, 0)) actual_max_sequence = array_ops.squeeze(actual_max_sequence, [0]) actual_max_score = array_ops.squeeze(actual_max_score, [0]) tf_actual_max_sequence, tf_actual_max_score = sess.run( [actual_max_sequence, actual_max_score]) self.assertAllClose(tf_actual_max_score, expected_max_score) self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]), expected_max_sequence[:sequence_lengths])
scores = tf.matmul(x_t_features, w_t, name='energies') scores = tf.reshape(scores, [num_train_examples, num_train_words, num_tags]) # Compute the log-likelihood of the gold sequences and keep the transition # params for inference at test time. log_likelihood, transition_weights_t = crf_log_likelihood(scores, y_t, train_sequence_lengths_t, transition_weights_t) x_train_t_features = tf.reshape(x_t, [-1, num_features], name='X_train_flattened') x_test_t_features = tf.reshape(x_test_t, [-1, num_features], name='X_test_flattened') test_scores = tf.matmul(x_test_t_features, w_t, name='test_energies') test_scores = tf.reshape(test_scores, [num_test_examples, num_test_words, num_tags]) # Compute the viterbi sequence and score. viterbi_sequence_train, viterbi_train_scores = crf_decode(scores, transition_weights_t, train_sequence_lengths_t) viterbi_sequence, viterbi_score = crf_decode(test_scores, transition_weights_t, test_sequence_lengths_t) # Add a training op to tune the parameters. loss = -C * tf.reduce_mean(log_likelihood) loss += 1e-2 * tf.nn.l2_loss(w_t) #loss += 1e-2 * 0.5 * tf.reduce_sum(tf.square(transition_weights_t)) loss += 1e-2 * tf.nn.l2_loss(transition_weights_t) global_step = tf.Variable(0, trainable=False, name='global_step') learning_rate = tf.train.exponential_decay(0.5, global_step, decay_rate=0.9, decay_steps=200, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) # Train for a fixed number of iterations. train_op = optimizer.minimize(loss, global_step)
from data_loader import load_Q1_data X, W, Tij = load_Q1_data() num_examples = 1 # 1 sample_dataset num_words = X.shape[0] # 100 num_features = X.shape[1] # 128 num_tags = W.shape[0] # 26 X = X.reshape((1, num_words, num_features)).astype(np.float32) with tf.Session() as sess: x_t = tf.constant(X, dtype=tf.float32, name='X') w_t = tf.constant(W, dtype=tf.float32, name='W') t_t = tf.constant(Tij, dtype=tf.float32, name='T') sequence_lengths_t = tf.constant([num_words], dtype=tf.int32) x_t_features = tf.reshape(x_t, [-1, num_features], name='X_flattened') scores = tf.matmul(x_t_features, w_t, transpose_b=True, name='energies') scores = tf.reshape(scores, [num_examples, num_words, num_tags]) viterbi_sequence, viterbi_score = crf_decode(scores, t_t, sequence_lengths_t) sequence = sess.run(viterbi_sequence)[0] sequence = [chr(s + 65) for s in sequence] for i, s in enumerate(sequence): print("i=%d : Predicted = %s" % (i + 1, s))
def _eval_predict(self): predictions = crf.crf_decode(self.logits, self._tag_transitions, tf.cast(self._sequence_lengths, tf.int32))[0] # optionally mask intermediate subtokens from prediction results self.predictions = self._mask_subtokens(predictions)