コード例 #1
0
ファイル: test_util.py プロジェクト: sagravat/caliban
  def test_chunks_below_limit(self):
    xs = [0, 1, 2, 3, 4, 5]

    # Below the limit, there's no breakdown.
    self.assertListEqual([xs], u.chunks_below_limit(xs, 100))

    # Below the limit, there's no breakdown.
    shards = [[0, 2, 4], [1, 3, 5]]
    self.assertListEqual(shards, u.chunks_below_limit(xs, 5))

    # You can recover the original list by zipping together the shards (if they
    # happen to be equal in length, as here.)
    self.assertListEqual(xs, list(itertools.chain(*list(zip(*shards)))))
コード例 #2
0
def test_chunks_below_limit():
    xs = [0, 1, 2, 3, 4, 5]

    # Below the limit, there's no breakdown.
    assert [xs] == u.chunks_below_limit(xs, 100)

    # Below the limit, there's no breakdown.
    shards = [[0, 2, 4], [1, 3, 5]]
    assert shards == u.chunks_below_limit(xs, 5)

    # You can recover the original list by zipping together the shards (if they
    # happen to be equal in length, as here.)
    assert xs == list(itertools.chain(*list(zip(*shards))))
コード例 #3
0
def logged_batches(specs: Iterable[ht.JobSpec],
                   limit: int) -> Iterable[Iterable[ht.JobSpec]]:
    """Accepts an iterable of specs and a 'chunk limit'; returns an iterable of
  iterable of JobSpec, each of which is guaranteed to contain at most 'chunk
  limit' items.

  The subsequences don't pull contiguous chunks off of the original input
  sequence, but the set of the union of the subsequences is the set of all
  original items.

  As you realize the generator you'll trigger:

  - a logging side-effect at the beginning of each batch
  - a logging effect between each item in each batch

  These logging effects will track the index of each batch and each item within
  the batch.

  """
    # Realize the input generator to get a count for logging.
    spec_list = list(specs)
    total_specs = len(spec_list)

    # Build N chunks such that each chunk contains <= items than the supplied
    # limit.
    chunked_seq = u.chunks_below_limit(spec_list, limit=limit)
    total_chunks = len(chunked_seq)

    # Go the extra mile.
    plural_batch = "batch" if total_chunks == 1 else "batches"
    plural_job = "job" if total_specs == 1 else "jobs"
    logging.info("Generating {} {} for {} {}.".format(total_chunks,
                                                      plural_batch,
                                                      total_specs, plural_job))
    for i, chunk in enumerate(chunked_seq, 1):
        logging.info("Batch {} of {}:".format(i, total_chunks))
        yield logged_specs(chunk)