Ejemplo n.º 1
0
def test_popitems():

    d = {k: str(k) for k in range(10)}

    for k, v in tools.popitems(d):
        assert k < 10
        assert v == str(k)
    assert not d
Ejemplo n.º 2
0
    def __call__(self, stream):

        if self.closed:
            raise errors.ClosedTaskError("Task is closed.")

        self.init_map()

        if self.map_jobs == 1:
            # Avoid the overhead (and debugging complexities) of
            # parallelized jobs
            results = _compat.map(self.mapper, stream)
        else:
            results = self._map_job_pool.imap_unordered(
                self._run_map,
                stream,
                self.map_chunksize)
        results = it.chain.from_iterable(results)

        # Parallelized jobs can be difficult to debug so the first set of
        # keys get a sniff check for some obvious potential problems.
        # Exceptions here prevent issues with multiprocessing getting confused
        # when a job fails.
        first = next(results)
        results = it.chain([first], results)
        expected_key_count = self.n_partition_keys + self.n_sort_keys + 1
        if len(first) != expected_key_count:
            raise errors.KeyCountError(
                "Expected {expected} keys from the map phase, not {actual} - "
                "first keys: {keys}".format(
                    expected=expected_key_count,
                    actual=len(first),
                    keys=first))
        self.check_map_keys(first)

        partitioned = defaultdict(deque)
        mapped = _compat.map(self._map_key_grouper, results)

        # Only sort when required
        if self.n_sort_keys == 0:
            for ptn, val in mapped:
                partitioned[ptn].append(val)
            partitioned_items = partitioned.items()
        else:
            for ptn, srt, val in mapped:
                partitioned[ptn].append((srt, val))
            if self.n_partition_keys > 1:
                partitioned_items = it.starmap(
                    lambda _ptn, srt_val: (_ptn[0], srt_val),
                    partitioned.items())
            else:
                partitioned_items = partitioned.items()

        # Reduce phase
        self.init_reduce()
        if self.reduce_jobs == 1:
            results = _compat.map(self._run_reduce, partitioned_items)
        else:
            results = self._reduce_job_pool.imap_unordered(
                self._run_reduce, partitioned_items, self.reduce_chunksize)
        results = it.chain.from_iterable(results)

        # Same as with the map phase, issue a more useful error
        first = next(results)
        results = it.chain([first], results)
        if len(first) != 2:
            raise errors.KeyCountError(
                "Expected 2 keys from the reduce phase, not {} - first "
                "keys: {}".format(len(first), first))
        self.check_reduce_keys(first)

        partitioned = defaultdict(deque)
        for k, v in results:
            partitioned[k].append(v)

        return self.output(tools.popitems(partitioned))