Exemple #1
0
 def performanceTable(self):
     if len(self._performanceTables) == 0:
         return PerformanceTable()
     else:
         return PerformanceTable.combine(self._performanceTables)
Exemple #2
0
    def iterate(self, inputData, iteration=0, sort=False, parallel=False, numberOfMappers=1, numberOfReducers=1, frozenClass=True):
        """Run a pure-Python map-reduce iteration.

        @type inputData: list of Python objects
        @param inputData: The objects to use as a data stream.
        @type iteration: int
        @param iteration: The iteration number.
        @type sort: bool
        @param sort: If True, perform a sorting step between the mapper and the reducer.
        @type parallel: bool
        @param parallel: If True, run the independent mappers and independent reducers as distinct threads.
        @type numberOfMappers: int
        @param numberOfMappers: Requested number of mappers.  Input data will be divided evenly among them.
        @type numberOfReducers: int
        @param numberOfReducers: Requested number of reducers.
        @type frozenClass: bool
        @param frozenClass: If True, practice serializing and unserializing the class to ensure the independence of the mappers and the reducers.  If False, skip this performance-limiting step.
        @rtype: 2-tuple of list, dict
        @return: List of output records and dictionary of output key-value pairs.
        """

        overheadPerformanceTable = PerformanceTable()
        self._performanceTables.append(overheadPerformanceTable)
        overheadPerformanceTable.begin("MapReduce.iterate")

        if iteration < len(self.mapRedApps):
            mapRedApp = self.mapRedApps[iteration]
            mapRedAppSerialized = self.mapRedAppsSerialized[iteration]
        else:
            mapRedApp = self.mapRedApps[-1]
            mapRedAppSerialized = self.mapRedAppsSerialized[-1]

        gatherOutput = mapRedApp.gatherOutput
        imports = mapRedApp.imports
        if imports is None:
            imports = {}
        namespace = self._buildNamespace(imports)

        self.logger.info("Start iteration %d with %d input records and %d metadata keys.", iteration, len(inputData), len(self.metadata))
        
        self.done = False
        overheadPerformanceTable.begin("copy metadata")
        startMetadata = copy.deepcopy(self.metadata)
        overheadPerformanceTable.end("copy metadata")

        intermediateData = {}
        dataLock = threading.Lock()
        def emit(appself, key, record):
            with dataLock:
                newTuple = copy.deepcopy((key, record))

                if key in intermediateData:
                    self.logger.debug("    key \"%s\": %r", key, record)
                    intermediateData[key].append(newTuple)

                else:
                    self.logger.debug("New key \"%s\": %r", key, record)
                    intermediateData[key] = [newTuple]

        if parallel:
            mapperThreads = []

        recordsPerMapper = int(math.ceil(len(inputData) / float(numberOfMappers)))
        for number in xrange(numberOfMappers):
            subData = inputData[(number * recordsPerMapper):((number + 1) * recordsPerMapper)]

            performanceTable = PerformanceTable()
            self._performanceTables.append(performanceTable)

            overrideAttributes = {"metadata": startMetadata, "iteration": iteration, "emit": emit, "performanceTable": performanceTable, "logger": logging.getLogger(mapRedApp.loggerName)}

            if frozenClass:
                overheadPerformanceTable.begin("unfreeze mapper")
                appClass = unserializeClass(mapRedAppSerialized, MapReduceApplication, overrideAttributes, namespace)
                overheadPerformanceTable.end("unfreeze mapper")
                controller = self.Controller(appClass())
            else:
                appInstance = mapRedApp()
                overrideAttributes["emit"] = types.MethodType(overrideAttributes["emit"], appInstance)
                appInstance.__dict__.update(overrideAttributes)
                controller = self.Controller(appInstance)

            if parallel:
                self.logger.info("Starting mapper %d in parallel with %d input records.", number, len(subData))
                mapperThreads.append(threading.Thread(target=controller.mapper, name=("Mapper_%03d" % number), args=(subData,)))
            else:
                self.logger.info("Starting mapper %d in series with %d input records.", number, len(subData))
                overheadPerformanceTable.pause("MapReduce.iterate")
                controller.mapper(subData)
                overheadPerformanceTable.unpause("MapReduce.iterate")

        if parallel:
            overheadPerformanceTable.pause("MapReduce.iterate")
            for thread in mapperThreads:
                thread.start()
            for thread in mapperThreads:
                thread.join()
            overheadPerformanceTable.unpause("MapReduce.iterate")

        self.logger.info("All mappers finished.")

        if sort:
            self.logger.info("Sorting %d intermediate values.", sum(len(x) for x in intermediateData.values()))
            overheadPerformanceTable.begin("sort intermediate data")
            for value in intermediateData.values():
                value.sort()
            overheadPerformanceTable.end("sort intermediate data")
        else:
            self.logger.info("Leaving %d intermediate values in the order in which they were generated.", sum(len(x) for x in intermediateData.values()))

        overheadPerformanceTable.begin("load balance")

        lengths = [(key, len(intermediateData[key])) for key in intermediateData]
        lengths.sort(lambda a, b: cmp(b[1], a[1]))

        assignments = [[] for x in xrange(numberOfReducers)]
        workload = [0] * numberOfReducers
        for key, length in lengths:
            index = min((w, i) for i, w in enumerate(workload))[1]   # this is argmin(workload)
            assignments[index].append(key)
            workload[index] += length
        
        if self.logger.isEnabledFor(logging.INFO):
            for i, (a, w) in enumerate(zip(assignments, workload)):
                if len(a) > 10:
                    self.logger.info("Assigning %d keys (%d total records) to reducer %d.", len(a), w, i)
                else:
                    self.logger.info("Assigning keys %s (%d total records) to reducer %d.", ", ".join("\"%s\"" % k for k in a), w, i)

        overheadPerformanceTable.end("load balance")

        outputRecords = []
        outputKeyValues = {}
        dataLock = threading.Lock()
        def emit(appself, key, record):
            if key is None:
                self.logger.debug("OutputRecord: %r", record)
                outputRecords.append(record)
            else:
                with dataLock:
                    if key in outputKeyValues:
                        raise RuntimeError("Two reducers are trying to write to the same metadata key: \"%s\"" % key)
                    else:
                        self.logger.debug("OutputKeyValue \"%s\": %r", key, record)
                        outputKeyValues[key] = record

        if parallel:
            reducerThreads = []

        for number in xrange(numberOfReducers):
            subData = []
            for key in assignments[number]:
                subData.extend(intermediateData[key])

            performanceTable = PerformanceTable()
            self._performanceTables.append(performanceTable)

            overrideAttributes = {"metadata": startMetadata, "iteration": iteration, "emit": emit, "performanceTable": performanceTable, "logger": logging.getLogger(mapRedApp.loggerName)}
            if frozenClass:
                overheadPerformanceTable.begin("unfreeze reducer")
                appClass = unserializeClass(mapRedAppSerialized, MapReduceApplication, overrideAttributes, namespace)
                overheadPerformanceTable.end("unfreeze reducer")
                controller = self.Controller(appClass())
            else:
                appInstance = mapRedApp()
                overrideAttributes["emit"] = types.MethodType(overrideAttributes["emit"], appInstance)
                appInstance.__dict__.update(overrideAttributes)
                controller = self.Controller(appInstance)            
            
            if parallel:
                self.logger.info("Starting reducer %d in parallel with %d input records.", number, len(subData))
                reducerThreads.append(threading.Thread(target=controller.reducer, name=("Reducer_%03d" % number), args=(subData,)))
            else:
                self.logger.info("Starting reducer %d in series with %d input records.", number, len(subData))
                overheadPerformanceTable.pause("MapReduce.iterate")
                controller.reducer(subData)
                overheadPerformanceTable.unpause("MapReduce.iterate")

        if parallel:
            overheadPerformanceTable.pause("MapReduce.iterate")
            for thread in reducerThreads:
                thread.start()
            for thread in reducerThreads:
                thread.join()
            overheadPerformanceTable.unpause("MapReduce.iterate")

        self.logger.info("All reducers finished.")
        self.logger.info("Finished iteration %s with %d output records and %d metadata keys.", iteration, len(outputRecords), len(outputKeyValues))

        if gatherOutput:
            performanceTable = PerformanceTable()
            self._performanceTables.append(performanceTable)

            overrideAttributes = {"metadata": startMetadata, "iteration": iteration, "emit": None, "performanceTable": performanceTable, "logger": logging.getLogger(mapRedApp.loggerName)}

            if frozenClass:
                overheadPerformanceTable.begin("unfreeze endIteration")
                appClass = unserializeClass(mapRedAppSerialized, MapReduceApplication, overrideAttributes, namespace)
                overheadPerformanceTable.end("unfreeze endIteration")
                appInstance = appClass()
            else:
                appInstance = mapRedApp()
                appInstance.__dict__.update(overrideAttributes)

            overheadPerformanceTable.pause("MapReduce.iterate")
            if appInstance.endIteration(outputRecords, outputKeyValues):
                self.done = True
            overheadPerformanceTable.unpause("MapReduce.iterate")

            self.metadata = appInstance.metadata

            overheadPerformanceTable.end("MapReduce.iterate")
            return outputRecords, outputKeyValues

        else:
            self.metadata = startMetadata
            overheadPerformanceTable.end("MapReduce.iterate")
            return [], {}
Exemple #3
0
    def run(self, inputData, iterationLimit=None, sort=False, parallel=False, numberOfMappers=1, numberOfReducers=1, frozenClass=True, performanceTable=None):
        """Run the whole pure-Python map-reduce workflow, which may involve multiple iterations of map-reduce.

        @type inputData: list of Python objects
        @param inputData: The objects to use as a data stream.
        @type iterationLimit: int
        @param iterationLimit: The highest iteration to allow, with 1 being the first iteration.
        @type sort: bool
        @param sort: If True, perform a sorting step between the mapper and the reducer.
        @type parallel: bool
        @param parallel: If True, run the independent mappers and independent reducers as distinct threads.
        @type numberOfMappers: int
        @param numberOfMappers: Requested number of mappers.  Input data will be divided evenly among them.
        @type numberOfReducers: int
        @param numberOfReducers: Requested number of reducers.
        @type frozenClass: bool
        @param frozenClass: If True, practice serializing and unserializing the class to ensure the independence of the mappers and the reducers.  If False, skip this performance-limiting step.
        @type performanceTable: PerformanceTable or None
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: 3-tuple of list, dict, int
        @return: List of output records, dictionary of output key-value pairs, and the final iteration number.
        """

        self.logger.info("Start run with %d input records and %d cache keys.", len(inputData), len(self.metadata))

        overheadPerformanceTable = PerformanceTable()

        if performanceTable is None:
            self._performanceTables = [overheadPerformanceTable]
        else:
            self._performanceTables = [performanceTable, overheadPerformanceTable]

        startTime = time.time()
        overheadPerformanceTable.begin("MapReduce.run")
        
        iteration = 0
        while True:
            if iteration < len(self.chain):
                chain = self.chain[iteration]
            else:
                chain = self.chain[-1]

            overheadPerformanceTable.pause("MapReduce.run")
            outputRecords, outputKeyValues = self.iterate(inputData, iteration, sort, parallel, numberOfMappers, numberOfReducers, frozenClass)
            overheadPerformanceTable.unpause("MapReduce.run")

            if self.done:
                break

            if chain:
                inputData = outputRecords
                self.logger.info("Chaining output to next iteration (%d intermediate records).", len(outputRecords))
            else:
                self.logger.info("Ignoring iteration output (%d ignored records).", len(outputRecords))

            iteration += 1
            if iterationLimit is not None and iteration >= iterationLimit:
                self.logger.info("Reached iteration limit of %d.", iterationLimit)
                self.done = True
                break

        overheadPerformanceTable.end("MapReduce.run")
        endTime = time.time()

        if self.logger.isEnabledFor(logging.INFO):
            stream = StringIO()
            stream.write("%sPerformanceTable (concurrent times are added together; results are in CPU-seconds):%s%s" % (os.linesep, os.linesep, os.linesep))

            self.performanceTable.look(stream=stream)
            stream.write("%sTotal wall time, including any concurrency: %g%s" % (os.linesep, endTime - startTime, os.linesep))
            self.logger.info(stream.getvalue())

        self.logger.info("Finished run with %d output records.", len(outputRecords))
        return outputRecords, outputKeyValues, iteration
Exemple #4
0
 def performanceTable(self):
     if len(self._performanceTables) == 0:
         return PerformanceTable()
     else:
         return PerformanceTable.combine(self._performanceTables)
Exemple #5
0
    def iterate(self,
                inputData,
                iteration=0,
                sort=False,
                parallel=False,
                numberOfMappers=1,
                numberOfReducers=1,
                frozenClass=True):
        """Run a pure-Python map-reduce iteration.

        @type inputData: list of Python objects
        @param inputData: The objects to use as a data stream.
        @type iteration: int
        @param iteration: The iteration number.
        @type sort: bool
        @param sort: If True, perform a sorting step between the mapper and the reducer.
        @type parallel: bool
        @param parallel: If True, run the independent mappers and independent reducers as distinct threads.
        @type numberOfMappers: int
        @param numberOfMappers: Requested number of mappers.  Input data will be divided evenly among them.
        @type numberOfReducers: int
        @param numberOfReducers: Requested number of reducers.
        @type frozenClass: bool
        @param frozenClass: If True, practice serializing and unserializing the class to ensure the independence of the mappers and the reducers.  If False, skip this performance-limiting step.
        @rtype: 2-tuple of list, dict
        @return: List of output records and dictionary of output key-value pairs.
        """

        overheadPerformanceTable = PerformanceTable()
        self._performanceTables.append(overheadPerformanceTable)
        overheadPerformanceTable.begin("MapReduce.iterate")

        if iteration < len(self.mapRedApps):
            mapRedApp = self.mapRedApps[iteration]
            mapRedAppSerialized = self.mapRedAppsSerialized[iteration]
        else:
            mapRedApp = self.mapRedApps[-1]
            mapRedAppSerialized = self.mapRedAppsSerialized[-1]

        gatherOutput = mapRedApp.gatherOutput
        imports = mapRedApp.imports
        if imports is None:
            imports = {}
        namespace = self._buildNamespace(imports)

        self.logger.info(
            "Start iteration %d with %d input records and %d metadata keys.",
            iteration, len(inputData), len(self.metadata))

        self.done = False
        overheadPerformanceTable.begin("copy metadata")
        startMetadata = copy.deepcopy(self.metadata)
        overheadPerformanceTable.end("copy metadata")

        intermediateData = {}
        dataLock = threading.Lock()

        def emit(appself, key, record):
            with dataLock:
                newTuple = copy.deepcopy((key, record))

                if key in intermediateData:
                    self.logger.debug("    key \"%s\": %r", key, record)
                    intermediateData[key].append(newTuple)

                else:
                    self.logger.debug("New key \"%s\": %r", key, record)
                    intermediateData[key] = [newTuple]

        if parallel:
            mapperThreads = []

        recordsPerMapper = int(
            math.ceil(len(inputData) / float(numberOfMappers)))
        for number in xrange(numberOfMappers):
            subData = inputData[(number * recordsPerMapper):((number + 1) *
                                                             recordsPerMapper)]

            performanceTable = PerformanceTable()
            self._performanceTables.append(performanceTable)

            overrideAttributes = {
                "metadata": startMetadata,
                "iteration": iteration,
                "emit": emit,
                "performanceTable": performanceTable,
                "logger": logging.getLogger(mapRedApp.loggerName)
            }

            if frozenClass:
                overheadPerformanceTable.begin("unfreeze mapper")
                appClass = unserializeClass(mapRedAppSerialized,
                                            MapReduceApplication,
                                            overrideAttributes, namespace)
                overheadPerformanceTable.end("unfreeze mapper")
                controller = self.Controller(appClass())
            else:
                appInstance = mapRedApp()
                overrideAttributes["emit"] = types.MethodType(
                    overrideAttributes["emit"], appInstance)
                appInstance.__dict__.update(overrideAttributes)
                controller = self.Controller(appInstance)

            if parallel:
                self.logger.info(
                    "Starting mapper %d in parallel with %d input records.",
                    number, len(subData))
                mapperThreads.append(
                    threading.Thread(target=controller.mapper,
                                     name=("Mapper_%03d" % number),
                                     args=(subData, )))
            else:
                self.logger.info(
                    "Starting mapper %d in series with %d input records.",
                    number, len(subData))
                overheadPerformanceTable.pause("MapReduce.iterate")
                controller.mapper(subData)
                overheadPerformanceTable.unpause("MapReduce.iterate")

        if parallel:
            overheadPerformanceTable.pause("MapReduce.iterate")
            for thread in mapperThreads:
                thread.start()
            for thread in mapperThreads:
                thread.join()
            overheadPerformanceTable.unpause("MapReduce.iterate")

        self.logger.info("All mappers finished.")

        if sort:
            self.logger.info("Sorting %d intermediate values.",
                             sum(len(x) for x in intermediateData.values()))
            overheadPerformanceTable.begin("sort intermediate data")
            for value in intermediateData.values():
                value.sort()
            overheadPerformanceTable.end("sort intermediate data")
        else:
            self.logger.info(
                "Leaving %d intermediate values in the order in which they were generated.",
                sum(len(x) for x in intermediateData.values()))

        overheadPerformanceTable.begin("load balance")

        lengths = [(key, len(intermediateData[key]))
                   for key in intermediateData]
        lengths.sort(lambda a, b: cmp(b[1], a[1]))

        assignments = [[] for x in xrange(numberOfReducers)]
        workload = [0] * numberOfReducers
        for key, length in lengths:
            index = min(
                (w, i)
                for i, w in enumerate(workload))[1]  # this is argmin(workload)
            assignments[index].append(key)
            workload[index] += length

        if self.logger.isEnabledFor(logging.INFO):
            for i, (a, w) in enumerate(zip(assignments, workload)):
                if len(a) > 10:
                    self.logger.info(
                        "Assigning %d keys (%d total records) to reducer %d.",
                        len(a), w, i)
                else:
                    self.logger.info(
                        "Assigning keys %s (%d total records) to reducer %d.",
                        ", ".join("\"%s\"" % k for k in a), w, i)

        overheadPerformanceTable.end("load balance")

        outputRecords = []
        outputKeyValues = {}
        dataLock = threading.Lock()

        def emit(appself, key, record):
            if key is None:
                self.logger.debug("OutputRecord: %r", record)
                outputRecords.append(record)
            else:
                with dataLock:
                    if key in outputKeyValues:
                        raise RuntimeError(
                            "Two reducers are trying to write to the same metadata key: \"%s\""
                            % key)
                    else:
                        self.logger.debug("OutputKeyValue \"%s\": %r", key,
                                          record)
                        outputKeyValues[key] = record

        if parallel:
            reducerThreads = []

        for number in xrange(numberOfReducers):
            subData = []
            for key in assignments[number]:
                subData.extend(intermediateData[key])

            performanceTable = PerformanceTable()
            self._performanceTables.append(performanceTable)

            overrideAttributes = {
                "metadata": startMetadata,
                "iteration": iteration,
                "emit": emit,
                "performanceTable": performanceTable,
                "logger": logging.getLogger(mapRedApp.loggerName)
            }
            if frozenClass:
                overheadPerformanceTable.begin("unfreeze reducer")
                appClass = unserializeClass(mapRedAppSerialized,
                                            MapReduceApplication,
                                            overrideAttributes, namespace)
                overheadPerformanceTable.end("unfreeze reducer")
                controller = self.Controller(appClass())
            else:
                appInstance = mapRedApp()
                overrideAttributes["emit"] = types.MethodType(
                    overrideAttributes["emit"], appInstance)
                appInstance.__dict__.update(overrideAttributes)
                controller = self.Controller(appInstance)

            if parallel:
                self.logger.info(
                    "Starting reducer %d in parallel with %d input records.",
                    number, len(subData))
                reducerThreads.append(
                    threading.Thread(target=controller.reducer,
                                     name=("Reducer_%03d" % number),
                                     args=(subData, )))
            else:
                self.logger.info(
                    "Starting reducer %d in series with %d input records.",
                    number, len(subData))
                overheadPerformanceTable.pause("MapReduce.iterate")
                controller.reducer(subData)
                overheadPerformanceTable.unpause("MapReduce.iterate")

        if parallel:
            overheadPerformanceTable.pause("MapReduce.iterate")
            for thread in reducerThreads:
                thread.start()
            for thread in reducerThreads:
                thread.join()
            overheadPerformanceTable.unpause("MapReduce.iterate")

        self.logger.info("All reducers finished.")
        self.logger.info(
            "Finished iteration %s with %d output records and %d metadata keys.",
            iteration, len(outputRecords), len(outputKeyValues))

        if gatherOutput:
            performanceTable = PerformanceTable()
            self._performanceTables.append(performanceTable)

            overrideAttributes = {
                "metadata": startMetadata,
                "iteration": iteration,
                "emit": None,
                "performanceTable": performanceTable,
                "logger": logging.getLogger(mapRedApp.loggerName)
            }

            if frozenClass:
                overheadPerformanceTable.begin("unfreeze endIteration")
                appClass = unserializeClass(mapRedAppSerialized,
                                            MapReduceApplication,
                                            overrideAttributes, namespace)
                overheadPerformanceTable.end("unfreeze endIteration")
                appInstance = appClass()
            else:
                appInstance = mapRedApp()
                appInstance.__dict__.update(overrideAttributes)

            overheadPerformanceTable.pause("MapReduce.iterate")
            if appInstance.endIteration(outputRecords, outputKeyValues):
                self.done = True
            overheadPerformanceTable.unpause("MapReduce.iterate")

            self.metadata = appInstance.metadata

            overheadPerformanceTable.end("MapReduce.iterate")
            return outputRecords, outputKeyValues

        else:
            self.metadata = startMetadata
            overheadPerformanceTable.end("MapReduce.iterate")
            return [], {}
Exemple #6
0
    def run(self,
            inputData,
            iterationLimit=None,
            sort=False,
            parallel=False,
            numberOfMappers=1,
            numberOfReducers=1,
            frozenClass=True,
            performanceTable=None):
        """Run the whole pure-Python map-reduce workflow, which may involve multiple iterations of map-reduce.

        @type inputData: list of Python objects
        @param inputData: The objects to use as a data stream.
        @type iterationLimit: int
        @param iterationLimit: The highest iteration to allow, with 1 being the first iteration.
        @type sort: bool
        @param sort: If True, perform a sorting step between the mapper and the reducer.
        @type parallel: bool
        @param parallel: If True, run the independent mappers and independent reducers as distinct threads.
        @type numberOfMappers: int
        @param numberOfMappers: Requested number of mappers.  Input data will be divided evenly among them.
        @type numberOfReducers: int
        @param numberOfReducers: Requested number of reducers.
        @type frozenClass: bool
        @param frozenClass: If True, practice serializing and unserializing the class to ensure the independence of the mappers and the reducers.  If False, skip this performance-limiting step.
        @type performanceTable: PerformanceTable or None
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: 3-tuple of list, dict, int
        @return: List of output records, dictionary of output key-value pairs, and the final iteration number.
        """

        self.logger.info("Start run with %d input records and %d cache keys.",
                         len(inputData), len(self.metadata))

        overheadPerformanceTable = PerformanceTable()

        if performanceTable is None:
            self._performanceTables = [overheadPerformanceTable]
        else:
            self._performanceTables = [
                performanceTable, overheadPerformanceTable
            ]

        startTime = time.time()
        overheadPerformanceTable.begin("MapReduce.run")

        iteration = 0
        while True:
            if iteration < len(self.chain):
                chain = self.chain[iteration]
            else:
                chain = self.chain[-1]

            overheadPerformanceTable.pause("MapReduce.run")
            outputRecords, outputKeyValues = self.iterate(
                inputData, iteration, sort, parallel, numberOfMappers,
                numberOfReducers, frozenClass)
            overheadPerformanceTable.unpause("MapReduce.run")

            if self.done:
                break

            if chain:
                inputData = outputRecords
                self.logger.info(
                    "Chaining output to next iteration (%d intermediate records).",
                    len(outputRecords))
            else:
                self.logger.info(
                    "Ignoring iteration output (%d ignored records).",
                    len(outputRecords))

            iteration += 1
            if iterationLimit is not None and iteration >= iterationLimit:
                self.logger.info("Reached iteration limit of %d.",
                                 iterationLimit)
                self.done = True
                break

        overheadPerformanceTable.end("MapReduce.run")
        endTime = time.time()

        if self.logger.isEnabledFor(logging.INFO):
            stream = StringIO()
            stream.write(
                "%sPerformanceTable (concurrent times are added together; results are in CPU-seconds):%s%s"
                % (os.linesep, os.linesep, os.linesep))

            self.performanceTable.look(stream=stream)
            stream.write("%sTotal wall time, including any concurrency: %g%s" %
                         (os.linesep, endTime - startTime, os.linesep))
            self.logger.info(stream.getvalue())

        self.logger.info("Finished run with %d output records.",
                         len(outputRecords))
        return outputRecords, outputKeyValues, iteration