def testSequenceConcat(self): x = np.asarray([[1, 2, 3, 0], [1, 2, 3, 4], [0, 0, 0, 0], [1, 0, 0, 0]], np.int32) x_paddings = np.asarray( [[0, 0, 0, 1], [0, 0, 0, 0], [1, 1, 1, 1], [0, 1, 1, 1]], np.float32) y = np.asarray( [[10, 20, 30, 0], [10, 20, 30, 40], [0, 0, 0, 0], [10, 0, 0, 0]], np.int32) y_paddings = np.asarray( [[0, 0, 0, 1], [0, 0, 0, 0], [1, 1, 1, 1], [0, 1, 1, 1]], np.float32) with self.session(): xy, xy_paddings = insertion.SequenceConcat( tf.convert_to_tensor(x), tf.convert_to_tensor(x_paddings), tf.convert_to_tensor(y), tf.convert_to_tensor(y_paddings), 999) xy, xy_paddings = self.evaluate( [tf.convert_to_tensor(xy), tf.convert_to_tensor(xy_paddings)]) # `xy_gold` is `x` and `y` concatenated. # `xy_paddings_gold` is the corresponding paddings. xy_gold = np.asarray( [[1, 2, 3, 10, 20, 30, 999, 999], [1, 2, 3, 4, 10, 20, 30, 40], [999, 999, 999, 999, 999, 999, 999, 999], [1, 10, 999, 999, 999, 999, 999, 999]], np.int32) xy_paddings_gold = np.asarray( [[0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 1, 1]], np.float32) self.assertAllEqual(xy, xy_gold) self.assertAllEqual(xy_paddings, xy_paddings_gold)
def _CreateCanvasAndTargets(self, batch): # pyformat: disable """Create the canvas and targets. Args: batch: A `.NestedMap`. - src: A `.NestedMap`. - ids: The source ids, ends in <eos>. - paddings: The source paddings. - tgt: A `.NestedMap`. - ids: The target ids, ends in <eos>. - paddings: The target paddings. Returns: A `NestedMap`. - canvas: The canvas (based off of the `rollin_policy`) of shape [batch_size, c_dim]. - canvas_paddings: The paddings of `canvas_indices`. - target_indices: The target indices (i.e., use these indices to tf.gather_nd the log-probs). Optional, only during training. - target_weights: The target weights. Optional, only during training. """ # pyformat: enable p = self.params if not p.is_eval: # Sample our src and tgt canvas. src_descriptor = self._SampleCanvasAndTargets(batch.src.ids, batch.src.paddings) tgt_descriptor = self._SampleCanvasAndTargets(batch.tgt.ids, batch.tgt.paddings) # Offset the src ids (to unshare embeddings between src/tgt). Note, we # only offset the canvas ids, but we do not offset the vocab ids. This # will result in unshared embeddings, but shared softmax. This is due to # GPU/TPU memory limitations, empirically it is known that unsharing # everything results in better performance. vocab_size = p.decoder.softmax.num_classes src_descriptor.canvas = tf.where( tf.equal(src_descriptor.canvas_paddings, 0), src_descriptor.canvas + vocab_size, src_descriptor.canvas) # Offset the tgt indices (need shift according to src length). batch_size = py_utils.GetShape(batch.src.ids)[0] # `target_batch` is a [num_targets, batch_size] tensor where each row # identifies which batch the target belongs to. Note the observation that, # tf.reduce_sum(target_batch, 1) == 1 \forall rows. target_batch = tf.cast( tf.equal( tf.expand_dims(tf.range(batch_size), 0), tf.expand_dims(tgt_descriptor.target_indices[:, 0], 1)), tf.int32) src_lens = tf.cast( tf.reduce_sum(1 - src_descriptor.canvas_paddings, 1), tf.int32) # `tgt_offset` is shape [num_targets] where each entry corresponds to the # offset needed for that target (due to the source length). tgt_offset = tf.matmul(target_batch, tf.expand_dims(src_lens, 1)) # We shift the tgt slot without touching the batch or vocab. tgt_descriptor.target_indices += tf.concat( [tf.zeros_like(tgt_offset), tgt_offset, tf.zeros_like(tgt_offset)], 1) # The canvas is simply the sequence-level concat of the src and tgt. canvas, canvas_paddings = insertion.SequenceConcat( src_descriptor.canvas, src_descriptor.canvas_paddings, tgt_descriptor.canvas, tgt_descriptor.canvas_paddings) target_indices = tf.concat( [src_descriptor.target_indices, tgt_descriptor.target_indices], 0) target_weights = tf.concat( [src_descriptor.target_weights, tgt_descriptor.target_weights], 0) return py_utils.NestedMap( canvas=canvas, canvas_paddings=canvas_paddings, target_indices=target_indices, target_weights=target_weights)