Python SequenceConcat 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: lingvo.core.insertion

메소드/함수: SequenceConcat

hotexamples.com에서의 예제들: 2

Python SequenceConcat - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 lingvo.core.insertion.SequenceConcat에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: insertion_test.py 프로젝트: tianweiy/lingvo

  def testSequenceConcat(self):
    x = np.asarray([[1, 2, 3, 0], [1, 2, 3, 4], [0, 0, 0, 0], [1, 0, 0, 0]],
                   np.int32)
    x_paddings = np.asarray(
        [[0, 0, 0, 1], [0, 0, 0, 0], [1, 1, 1, 1], [0, 1, 1, 1]], np.float32)

    y = np.asarray(
        [[10, 20, 30, 0], [10, 20, 30, 40], [0, 0, 0, 0], [10, 0, 0, 0]],
        np.int32)
    y_paddings = np.asarray(
        [[0, 0, 0, 1], [0, 0, 0, 0], [1, 1, 1, 1], [0, 1, 1, 1]], np.float32)

    with self.session():
      xy, xy_paddings = insertion.SequenceConcat(
          tf.convert_to_tensor(x), tf.convert_to_tensor(x_paddings),
          tf.convert_to_tensor(y), tf.convert_to_tensor(y_paddings), 999)

      xy, xy_paddings = self.evaluate(
          [tf.convert_to_tensor(xy),
           tf.convert_to_tensor(xy_paddings)])

      # `xy_gold` is `x` and `y` concatenated.
      # `xy_paddings_gold` is the corresponding paddings.
      xy_gold = np.asarray(
          [[1, 2, 3, 10, 20, 30, 999, 999], [1, 2, 3, 4, 10, 20, 30, 40],
           [999, 999, 999, 999, 999, 999, 999, 999],
           [1, 10, 999, 999, 999, 999, 999, 999]], np.int32)
      xy_paddings_gold = np.asarray(
          [[0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0],
           [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 1, 1]], np.float32)

      self.assertAllEqual(xy, xy_gold)
      self.assertAllEqual(xy_paddings, xy_paddings_gold)

예제 #2

파일 보기

파일: model.py 프로젝트: wgfi110/lingvo

  def _CreateCanvasAndTargets(self, batch):
    # pyformat: disable
    """Create the canvas and targets.

    Args:
      batch: A `.NestedMap`.

        - src: A `.NestedMap`.
          - ids: The source ids, ends in <eos>.
          - paddings: The source paddings.

        - tgt: A `.NestedMap`.
          - ids: The target ids, ends in <eos>.
          - paddings: The target paddings.

    Returns:
      A `NestedMap`.
        - canvas: The canvas (based off of the `rollin_policy`) of shape
          [batch_size, c_dim].
        - canvas_paddings: The paddings of `canvas_indices`.
        - target_indices: The target indices (i.e., use these indices to
          tf.gather_nd the log-probs). Optional, only during training.
        - target_weights: The target weights. Optional, only during training.
    """
    # pyformat: enable
    p = self.params

    if not p.is_eval:
      # Sample our src and tgt canvas.
      src_descriptor = self._SampleCanvasAndTargets(batch.src.ids,
                                                    batch.src.paddings)
      tgt_descriptor = self._SampleCanvasAndTargets(batch.tgt.ids,
                                                    batch.tgt.paddings)

      # Offset the src ids (to unshare embeddings between src/tgt). Note, we
      # only offset the canvas ids, but we do not offset the vocab ids. This
      # will result in unshared embeddings, but shared softmax. This is due to
      # GPU/TPU memory limitations, empirically it is known that unsharing
      # everything results in better performance.
      vocab_size = p.decoder.softmax.num_classes
      src_descriptor.canvas = tf.where(
          tf.equal(src_descriptor.canvas_paddings, 0),
          src_descriptor.canvas + vocab_size, src_descriptor.canvas)

      # Offset the tgt indices (need shift according to src length).
      batch_size = py_utils.GetShape(batch.src.ids)[0]
      # `target_batch` is a [num_targets, batch_size] tensor where each row
      # identifies which batch the target belongs to. Note the observation that,
      # tf.reduce_sum(target_batch, 1) == 1 \forall rows.
      target_batch = tf.cast(
          tf.equal(
              tf.expand_dims(tf.range(batch_size), 0),
              tf.expand_dims(tgt_descriptor.target_indices[:, 0], 1)), tf.int32)
      src_lens = tf.cast(
          tf.reduce_sum(1 - src_descriptor.canvas_paddings, 1), tf.int32)
      # `tgt_offset` is shape [num_targets] where each entry corresponds to the
      # offset needed for that target (due to the source length).
      tgt_offset = tf.matmul(target_batch, tf.expand_dims(src_lens, 1))
      # We shift the tgt slot without touching the batch or vocab.
      tgt_descriptor.target_indices += tf.concat(
          [tf.zeros_like(tgt_offset), tgt_offset,
           tf.zeros_like(tgt_offset)], 1)

      # The canvas is simply the sequence-level concat of the src and tgt.
      canvas, canvas_paddings = insertion.SequenceConcat(
          src_descriptor.canvas, src_descriptor.canvas_paddings,
          tgt_descriptor.canvas, tgt_descriptor.canvas_paddings)
      target_indices = tf.concat(
          [src_descriptor.target_indices, tgt_descriptor.target_indices], 0)
      target_weights = tf.concat(
          [src_descriptor.target_weights, tgt_descriptor.target_weights], 0)

      return py_utils.NestedMap(
          canvas=canvas,
          canvas_paddings=canvas_paddings,
          target_indices=target_indices,
          target_weights=target_weights)