def testBatchingSchemeMaxLength(self):
        scheme = data_reader._batching_scheme(batch_size=20,
                                              max_length=None,
                                              min_length_bucket=8,
                                              length_bucket_step=1.1,
                                              drop_long_sequences=False)
        self.assertGreater(scheme["max_length"], 10000)

        scheme = data_reader._batching_scheme(batch_size=20,
                                              max_length=None,
                                              min_length_bucket=8,
                                              length_bucket_step=1.1,
                                              drop_long_sequences=True)
        self.assertEqual(scheme["max_length"], 20)

        scheme = data_reader._batching_scheme(batch_size=20,
                                              max_length=15,
                                              min_length_bucket=8,
                                              length_bucket_step=1.1,
                                              drop_long_sequences=True)
        self.assertEqual(scheme["max_length"], 15)

        scheme = data_reader._batching_scheme(batch_size=20,
                                              max_length=15,
                                              min_length_bucket=8,
                                              length_bucket_step=1.1,
                                              drop_long_sequences=False)
        self.assertGreater(scheme["max_length"], 10000)
예제 #2
0
  def testBatchingSchemeMaxLength(self):
    scheme = data_reader._batching_scheme(
        batch_size=20,
        max_length=None,
        min_length_bucket=8,
        length_bucket_step=1.1,
        drop_long_sequences=False)
    self.assertGreater(scheme["max_length"], 10000)

    scheme = data_reader._batching_scheme(
        batch_size=20,
        max_length=None,
        min_length_bucket=8,
        length_bucket_step=1.1,
        drop_long_sequences=True)
    self.assertEqual(scheme["max_length"], 20)

    scheme = data_reader._batching_scheme(
        batch_size=20,
        max_length=15,
        min_length_bucket=8,
        length_bucket_step=1.1,
        drop_long_sequences=True)
    self.assertEqual(scheme["max_length"], 15)

    scheme = data_reader._batching_scheme(
        batch_size=20,
        max_length=15,
        min_length_bucket=8,
        length_bucket_step=1.1,
        drop_long_sequences=False)
    self.assertGreater(scheme["max_length"], 10000)
예제 #3
0
  def testBatchingSchemeBuckets(self):
    scheme = data_reader._batching_scheme(batch_size=128)
    boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"]
    self.assertEqual(len(boundaries), len(batch_sizes) - 1)
    expected_boundaries = [8, 12, 16, 24, 32, 48, 64, 96]
    self.assertEqual(expected_boundaries, boundaries)
    expected_batch_sizes = [16, 10, 8, 5, 4, 2, 2, 1, 1]
    self.assertEqual(expected_batch_sizes, batch_sizes)

    scheme = data_reader._batching_scheme(batch_size=128, shard_multiplier=2)
    boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"]
    self.assertAllEqual([bs * 2 for bs in expected_batch_sizes], batch_sizes)
    self.assertEqual(expected_boundaries, boundaries)

    scheme = data_reader._batching_scheme(batch_size=128, length_multiplier=2)
    boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"]
    self.assertAllEqual([b * 2 for b in expected_boundaries], boundaries)
    self.assertEqual([max(1, bs // 2)
                      for bs in expected_batch_sizes], batch_sizes)
예제 #4
0
    def testBatchingSchemeBuckets(self):
        scheme = data_reader._batching_scheme(batch_size=128,
                                              max_length=0,
                                              min_length_bucket=8,
                                              length_bucket_step=1.1)
        boundaries, batch_sizes = scheme["bucket_boundaries"], scheme[
            "bucket_batch_sizes"]
        self.assertEqual(len(boundaries), len(batch_sizes) - 1)
        expected_boundaries = [
            8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28,
            30, 33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113,
            124
        ]
        self.assertEqual(expected_boundaries, boundaries)
        expected_batch_sizes = [
            16, 12, 12, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 3, 3, 3,
            3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1
        ]
        self.assertEqual(expected_batch_sizes, batch_sizes)

        scheme = data_reader._batching_scheme(batch_size=128,
                                              max_length=0,
                                              min_length_bucket=8,
                                              length_bucket_step=1.1,
                                              shard_multiplier=2)
        boundaries, batch_sizes = scheme["bucket_boundaries"], scheme[
            "bucket_batch_sizes"]
        self.assertAllEqual([bs * 2 for bs in expected_batch_sizes],
                            batch_sizes)
        self.assertEqual(expected_boundaries, boundaries)

        scheme = data_reader._batching_scheme(batch_size=128,
                                              max_length=0,
                                              min_length_bucket=8,
                                              length_bucket_step=1.1,
                                              length_multiplier=2)
        boundaries, batch_sizes = scheme["bucket_boundaries"], scheme[
            "bucket_batch_sizes"]
        self.assertAllEqual([b * 2 for b in expected_boundaries], boundaries)
        self.assertEqual([max(1, bs // 2) for bs in expected_batch_sizes],
                         batch_sizes)
예제 #5
0
  def testBatchingSchemeBuckets(self):
    scheme = data_reader._batching_scheme(
        batch_size=128,
        max_length=0,
        min_length_bucket=8,
        length_bucket_step=1.1)
    boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"]
    self.assertEqual(len(boundaries), len(batch_sizes) - 1)
    expected_boundaries = [
        8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30,
        33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113, 124
    ]
    self.assertEqual(expected_boundaries, boundaries)
    expected_batch_sizes = [
        16, 12, 12, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2,
        2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1
    ]
    self.assertEqual(expected_batch_sizes, batch_sizes)

    scheme = data_reader._batching_scheme(
        batch_size=128,
        max_length=0,
        min_length_bucket=8,
        length_bucket_step=1.1,
        shard_multiplier=2)
    boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"]
    self.assertAllEqual([bs * 2 for bs in expected_batch_sizes], batch_sizes)
    self.assertEqual(expected_boundaries, boundaries)

    scheme = data_reader._batching_scheme(
        batch_size=128,
        max_length=0,
        min_length_bucket=8,
        length_bucket_step=1.1,
        length_multiplier=2)
    boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"]
    self.assertAllEqual([b * 2 for b in expected_boundaries], boundaries)
    self.assertEqual([max(1, bs // 2)
                      for bs in expected_batch_sizes], batch_sizes)