Ejemplo n.º 1
0
 def mkNullResult():
     fittedStatistic = TimeseriesStatistic(
         self.fitter.observedTS[self.fitter.selectedColumns])
     return BootstrapResult(self.fitter, 0, {}, fittedStatistic)
Ejemplo n.º 2
0
    def run(self):
        """
        Runs the bootstrap.

        Returns
        -------
        BootstrapResult
        """
        def mkNullResult():
            fittedStatistic = TimeseriesStatistic(
                self.fitter.observedTS[self.fitter.selectedColumns])
            return BootstrapResult(self.fitter, 0, {}, fittedStatistic)

        #
        if self.isDone:
            return
        # Set up logging for this run
        if self.fd is not None:
            sys.stderr = self.fd
            sys.stdout = self.fd
        isSuccess = False
        bootstrapError = 0
        self.report()
        for _ in range(ITERATION_MULTIPLIER):
            newObservedTS = self.synthesizer.calculate()
            self.report("newObservedTS")
            # Update fitter to use the new observed data
            _ = self.fitter._updateObservedTS(newObservedTS, isCheck=False)
            self.report("updated fitter")
            # Try fitting
            try:
                self.fitter.fitModel(params=self.fitter.params)
                self.report("fitter.fit")
            except Exception as err:
                # Problem with the fit.
                msg = "modelFitterBootstrap. Fit failed on iteration %d."  \
                      % iteration
                self.logger.error(msg, err)
                bootstrapError += 1
                continue
            # Verify that there is a result
            if self.fitter.minimizerResult is None:
                continue
            # Check if the fit is of sufficient quality
            if self.fitter.minimizerResult.redchi > MAX_CHISQ_MULT * self.baseChisq:
                continue
            if self.fitter.params is None:
                continue
            isSuccess = True
            self.report("break")
            break
        # Create the result
        if isSuccess:
            self.numSuccessIteration += 1
            parameterDct = {
                k: [v]
                for k, v in self.fitter.params.valuesdict().items()
            }
            fittedStatistic = self.baseFittedStatistic.copy()
            fittedStatistic.accumulate(
                self.fitter.fittedTS.subsetColumns(self.fitter.selectedColumns,
                                                   isCopy=False))
            bootstrapResult = BootstrapResult(self.fitter,
                                              self.numSuccessIteration,
                                              parameterDct,
                                              fittedStatistic,
                                              bootstrapError=bootstrapError)
        else:
            bootstrapResult = mkNullResult()
            self._isDone = True
        # Close the logging file
        if self.fd is not None:
            if not self.fd.closed:
                self.fd.close()
        # See if completed work
        if self.numSuccessIteration >= self.numIteration:
            self._isDone = True
        return bootstrapResult
Ejemplo n.º 3
0
    def bootstrap(
            self,
            # The following must be kept in sync with ModelFitterCore.__init__
            numIteration: int = 10,
            reportInterval: int = 1000,
            synthesizerClass=ObservationSynthesizerRandomizedResiduals,
            maxProcess: int = None,
            serializePath: str = None,
            **kwargs: dict):
        """
        Constructs a bootstrap estimate of parameter values.
    
        Parameters
        ----------
        numIteration: number of bootstrap iterations
        reportInterval: number of iterations between progress reports
        synthesizerClass: object that synthesizes new observations
            Must subclass ObservationSynthesizer
        maxProcess: Maximum number of processes to use. Default: numCPU
        serializePath: Where to serialize the fitter after bootstrap
        kwargs: arguments passed to ObservationSynthesizer
              
        Example
        -------
            f.bootstrap()
            f.getParameterMeans()  # Mean values
            f.getParameterStds()  # standard deviations

        Notes
            1. Arguments can be overriden by the constructor using
               the keyword argument bootstrapKwargs.
        ----
        """
        def get(name, value):
            if name in self.bootstrapKwargs:
                if self.bootstrapKwargs[name] is not None:
                    return self.bootstrapKwargs[name]
                else:
                    return value
            else:
                return value

        # Handle overrides of arguments specified in constructor
        numIteration = get("numIteration", numIteration)
        reportInterval = get("reportInterval", reportInterval)
        synthesizerClass = get("synthesizerClass", synthesizerClass)
        maxProcess = get("maxProcess", maxProcess)
        serializePath = get("serializePath", serializePath)
        # Other initializations
        if maxProcess is None:
            maxProcess = multiprocessing.cpu_count()
        if self.minimizerResult is None:
            self.fitModel()
        base_redchi = self.minimizerResult.redchi
        # Run processes
        numProcess = max(int(numIteration / ITERATION_PER_PROCESS), 1)
        numProcess = min(numProcess, maxProcess)
        numProcessIteration = int(np.ceil(numIteration / numProcess))
        args_list = [
            _Arguments(self,
                       numProcess,
                       i,
                       numIteration=numProcessIteration,
                       reportInterval=reportInterval,
                       synthesizerClass=synthesizerClass,
                       _loggerPrefix="bootstrap",
                       **kwargs) for i in range(numProcess)
        ]
        msg = "Running bootstrap for %d successful iterations " % numIteration
        msg += "with %d processes." % numProcess
        self.logger.activity(msg)
        # Run separate processes for each bootstrap
        processes = []
        queue = multiprocessing.Queue()
        results = []
        # Set to False for debug so not doing multiple processes
        if True:
            for args in args_list:
                p = multiprocessing.Process(target=_runBootstrap,
                                            args=(
                                                args,
                                                queue,
                                            ))
                p.start()
                processes.append(p)
            timeout = MAX_ITERATION_TIME * numProcessIteration
            try:
                # Get rid of possible zombies
                for _ in range(len(processes)):
                    results.append(queue.get(timeout=timeout))
                # Get rid of possible zombies
                for process in processes:
                    process.terminate()
            except Exception as err:
                msg = "modelFitterBootstrap/Error in process management"
                self.logger.error(msg, err)
            finally:
                pass
        else:
            # Keep to debug _runBootstrap single threaded
            results = []
            for args in args_list:
                results.append(_runBootstrap(args))
        if len(results) == 0:
            msg = "modelFitterBootstrap/timeout in solving model."
            msg = "\nConsider increasing per timeout."
            msg = "\nCurent value: %f" % MAX_ITERATION_TIME
            self.logger.result(msg)
        else:
            self.bootstrapResult = BootstrapResult.merge(results)
            # Update the logger in place
            _ = _helpers.copyObject(self.bootstrapResult.fitter.logger,
                                    self.logger)
            if self.bootstrapResult.fittedStatistic is not None:
                self.bootstrapResult.fittedStatistic.calculate()
            self.logger.result("%d bootstrap estimates of parameters." %
                               self.bootstrapResult.numSimulation)
            if serializePath is not None:
                self.serialize(serializePath)
Ejemplo n.º 4
0
def _runBootstrap(arguments: _Arguments, queue=None) -> BootstrapResult:
    """
    Executes bootstrapping.

    Parameters
    ----------
    arguments: inputs to bootstrap
    queue: multiprocessing.Queue

    Notes
    -----
    1. Only the first process generates progress reports.
    2. Uses METHOD_LEASTSQ for fitModel iterations.
    """
    fitter = arguments.fitter
    logger = fitter.logger
    mainBlock = Logger.join(arguments._loggerPrefix, "_runBootstrap")
    mainGuid = logger.startBlock(mainBlock)
    # Unapack arguments
    isSuccess = False
    lastErr = ""
    # Do an initial fit
    for _ in range(MAX_TRIES):
        try:
            fitter.fitModel()  # Initialize model
            isSuccess = True
            break
        except Exception as err:
            lastErr = err
    # Set up logging for this process
    fd = logger.getFileDescriptor()
    processIdx = arguments.processIdx
    if fd is not None:
        sys.stderr = logger.getFileDescriptor()
        sys.stdout = logger.getFileDescriptor()
    iterationGuid = None
    if not isSuccess:
        msg = "Process %d/modelFitterBootstrip/_runBootstrap" % processIdx
        logger.error(msg, lastErr)
        fittedStatistic = TimeseriesStatistic(fitter.observedTS,
                                              percentiles=[])
        bootstrapResult = BootstrapResult(fitter, 0, {}, fittedStatistic)
    else:
        numIteration = arguments.numIteration
        reportInterval = arguments.reportInterval
        processingRate = min(arguments.numProcess, multiprocessing.cpu_count())
        cols = fitter.selectedColumns
        synthesizer = arguments.synthesizerClass(
            observedTS=fitter.observedTS.subsetColumns(cols),
            fittedTS=fitter.fittedTS.subsetColumns(cols),
            **arguments.kwargs)
        # Initialize
        parameterDct = {p: [] for p in fitter.parametersToFit}
        numSuccessIteration = 0
        lastReport = 0
        if fitter.minimizerResult is None:
            fitter.fitModel()
        baseChisq = fitter.minimizerResult.redchi
        # Do the bootstrap iterations
        bootstrapError = 0
        iterationBlock = Logger.join(mainBlock, "Iteration")
        for iteration in range(numIteration * ITERATION_MULTIPLIER):
            if iterationGuid is not None:
                logger.endBlock(iterationGuid)
            iterationGuid = logger.startBlock(iterationBlock)
            newObservedTS = synthesizer.calculate()
            fittingSetupBlock = Logger.join(iterationBlock, "fittingSetup")
            fittingSetupGuid = logger.startBlock(fittingSetupBlock)
            newFitter = ModelFitterBootstrap(
                fitter.roadrunnerModel,
                newObservedTS,
                fitter.parametersToFit,
                selectedColumns=fitter.selectedColumns,
                # Use bootstrap methods for fitting
                fitterMethods=fitter._bootstrapMethods,
                parameterLowerBound=fitter.lowerBound,
                parameterUpperBound=fitter.upperBound,
                fittedDataTransformDct=fitter.fittedDataTransformDct,
                logger=logger,
                _loggerPrefix=iterationBlock,
                isPlot=fitter._isPlot)
            fittedStatistic = TimeseriesStatistic(newFitter.observedTS,
                                                  percentiles=[])
            logger.endBlock(fittingSetupGuid)
            try:
                if (iteration > 0) and (iteration != lastReport)  \
                        and (processIdx == 0):
                    totalSuccessIteration = numSuccessIteration * processingRate
                    totalIteration = iteration * processingRate
                    if totalIteration % reportInterval == 0:
                        msg = "Bootstrap completed %d total iterations "
                        msg += "with %d successes."
                        msg = msg % (totalIteration, totalSuccessIteration)
                        fitter.logger.status(msg)
                        lastReport = numSuccessIteration
                if numSuccessIteration >= numIteration:
                    # Performed the iterations
                    break
                tryBlock = Logger.join(iterationBlock, "try")
                tryGuid = logger.startBlock(tryBlock)
                try:
                    tryFitterBlock = Logger.join(tryBlock, "Fitter")
                    tryFitterGuid = logger.startBlock(tryFitterBlock)
                    newFitter.fitModel(params=fitter.params)
                    logger.endBlock(tryFitterGuid)
                except Exception as err:
                    # Problem with the fit. Don't numSuccessIteration it.
                    msg = "Process %d/modelFitterBootstrap" % processIdx
                    msg += " Fit failed on iteration %d." % iteration
                    fitter.logger.error(msg, err)
                    logger.endBlock(tryGuid)
                    continue
                if newFitter.minimizerResult.redchi > MAX_CHISQ_MULT * baseChisq:
                    if IS_REPORT:
                        msg = "Process %d: Fit has high chisq: %2.2f on iteration %d."
                        fitter.logger.exception(
                            msg %
                            (processIdx, newFitter.minimizerResult.redchi,
                             iteration))
                    logger.endBlock(tryGuid)
                    continue
                if newFitter.params is None:
                    continue
                numSuccessIteration += 1
                dct = newFitter.params.valuesdict()
                [
                    parameterDct[p].append(dct[p])
                    for p in fitter.parametersToFit
                ]
                cols = newFitter.fittedTS.colnames
                fittedStatistic.accumulate(newFitter.fittedTS)
                newFitter.observedTS = synthesizer.calculate()
                logger.endBlock(tryGuid)
            except Exception as err:
                msg = "Process %d/modelFitterBootstrap" % processIdx
                msg += " Error on iteration %d." % iteration
                fitter.logger.error(msg, err)
                bootstrapError += 1
        fitter.logger.status("Process %d: completed bootstrap." %
                             (processIdx + 1))
        bootstrapResult = BootstrapResult(fitter,
                                          numSuccessIteration,
                                          parameterDct,
                                          fittedStatistic,
                                          bootstrapError=bootstrapError)
    if iterationGuid is not None:
        logger.endBlock(iterationGuid)
    logger.endBlock(mainGuid)
    if fd is not None:
        if not fd.closed:
            fd.close()
    if queue is None:
        return bootstrapResult
    else:
        queue.put(bootstrapResult)
Ejemplo n.º 5
0
    def bootstrap(
            self,
            isParallel=True,
            # The following must be kept in sync with ModelFitterCore.__init__
            numIteration: int = None,
            maxProcess: int = None,
            serializePath: str = None,
            **kwargs: dict):
        """
        Constructs a bootstrap estimate of parameter values.

        Parameters
        ----------
        isParallel: bool
            run in parallel
        numIteration: number of bootstrap iterations
        maxProcess: Maximum number of processes to use. Default: numCPU
        serializePath: Where to serialize the fitter after bootstrap
        kwargs: arguments passed to ObservationSynthesizer

        Example
        -------
            f.bootstrap()
            f.getParameterMeans()  # Mean values
            f.getParameterStds()  # standard deviations

        Notes
            1. Arguments can be overriden by the constructor using
               the keyword argument bootstrapKwargs.
        ----
        """
        def getValue(name, value, defaultValue=None):
            if value is not None:
                return value
            # Handle arguments specified in constructor
            if name in self.bootstrapKwargs:
                if self.bootstrapKwargs[name] is not None:
                    return self.bootstrapKwargs[name]
            if name in self.__dict__.keys():
                return self.__dict__[name]
            # None specified
            return defaultValue

        #
        # Initialization
        numIteration = getValue("numIteration", numIteration)
        isParallel = getValue("_isParallel", isParallel)
        isProgressBar = getValue("_isProgressBar", None, defaultValue=True)
        if maxProcess is None:
            maxProcess = self._maxProcess
        if maxProcess is None:
            maxProcess = multiprocessing.cpu_count()
        serializePath = getValue("serializePath", serializePath)
        # Ensure that there is a fitted model
        if self.minimizerResult is None:
            self.fitModel()
        # Construct arguments collection
        numProcess = min(maxProcess, numIteration)
        batchSize = numIteration // numProcess
        argumentsCol = [
            RunnerArgument(self,
                           numIteration=batchSize,
                           _loggerPrefix="bootstrap",
                           **kwargs) for i in range(numProcess)
        ]
        # Run separate processes for each batch
        runner = ParallelRunner(BootstrapRunner,
                                desc="iteration",
                                maxProcess=numProcess)
        results = runner.runSync(argumentsCol,
                                 isParallel=isParallel,
                                 isProgressBar=isProgressBar)
        # Check the results
        if len(results) == 0:
            msg = "modelFitterBootstrap/timeout in solving model."
            msg = "\nConsider increasing per timeout."
            msg = "\nCurent value: %f" % MAX_ITERATION_TIME
            self.logger.result(msg)
        else:
            self.bootstrapResult = BootstrapResult.merge(results, self)
            # Update the logger in place
            _ = _helpers.copyObject(self.bootstrapResult.fitter.logger,
                                    self.logger)
            self.bootstrapResult.fittedStatistic.calculate()
            self.logger.result("%d bootstrap estimates of parameters." %
                               self.bootstrapResult.numSimulation)
            if serializePath is not None:
                self.serialize(serializePath)