def mkNullResult(): fittedStatistic = TimeseriesStatistic( self.fitter.observedTS[self.fitter.selectedColumns]) return BootstrapResult(self.fitter, 0, {}, fittedStatistic)
def run(self): """ Runs the bootstrap. Returns ------- BootstrapResult """ def mkNullResult(): fittedStatistic = TimeseriesStatistic( self.fitter.observedTS[self.fitter.selectedColumns]) return BootstrapResult(self.fitter, 0, {}, fittedStatistic) # if self.isDone: return # Set up logging for this run if self.fd is not None: sys.stderr = self.fd sys.stdout = self.fd isSuccess = False bootstrapError = 0 self.report() for _ in range(ITERATION_MULTIPLIER): newObservedTS = self.synthesizer.calculate() self.report("newObservedTS") # Update fitter to use the new observed data _ = self.fitter._updateObservedTS(newObservedTS, isCheck=False) self.report("updated fitter") # Try fitting try: self.fitter.fitModel(params=self.fitter.params) self.report("fitter.fit") except Exception as err: # Problem with the fit. msg = "modelFitterBootstrap. Fit failed on iteration %d." \ % iteration self.logger.error(msg, err) bootstrapError += 1 continue # Verify that there is a result if self.fitter.minimizerResult is None: continue # Check if the fit is of sufficient quality if self.fitter.minimizerResult.redchi > MAX_CHISQ_MULT * self.baseChisq: continue if self.fitter.params is None: continue isSuccess = True self.report("break") break # Create the result if isSuccess: self.numSuccessIteration += 1 parameterDct = { k: [v] for k, v in self.fitter.params.valuesdict().items() } fittedStatistic = self.baseFittedStatistic.copy() fittedStatistic.accumulate( self.fitter.fittedTS.subsetColumns(self.fitter.selectedColumns, isCopy=False)) bootstrapResult = BootstrapResult(self.fitter, self.numSuccessIteration, parameterDct, fittedStatistic, bootstrapError=bootstrapError) else: bootstrapResult = mkNullResult() self._isDone = True # Close the logging file if self.fd is not None: if not self.fd.closed: self.fd.close() # See if completed work if self.numSuccessIteration >= self.numIteration: self._isDone = True return bootstrapResult
def bootstrap( self, # The following must be kept in sync with ModelFitterCore.__init__ numIteration: int = 10, reportInterval: int = 1000, synthesizerClass=ObservationSynthesizerRandomizedResiduals, maxProcess: int = None, serializePath: str = None, **kwargs: dict): """ Constructs a bootstrap estimate of parameter values. Parameters ---------- numIteration: number of bootstrap iterations reportInterval: number of iterations between progress reports synthesizerClass: object that synthesizes new observations Must subclass ObservationSynthesizer maxProcess: Maximum number of processes to use. Default: numCPU serializePath: Where to serialize the fitter after bootstrap kwargs: arguments passed to ObservationSynthesizer Example ------- f.bootstrap() f.getParameterMeans() # Mean values f.getParameterStds() # standard deviations Notes 1. Arguments can be overriden by the constructor using the keyword argument bootstrapKwargs. ---- """ def get(name, value): if name in self.bootstrapKwargs: if self.bootstrapKwargs[name] is not None: return self.bootstrapKwargs[name] else: return value else: return value # Handle overrides of arguments specified in constructor numIteration = get("numIteration", numIteration) reportInterval = get("reportInterval", reportInterval) synthesizerClass = get("synthesizerClass", synthesizerClass) maxProcess = get("maxProcess", maxProcess) serializePath = get("serializePath", serializePath) # Other initializations if maxProcess is None: maxProcess = multiprocessing.cpu_count() if self.minimizerResult is None: self.fitModel() base_redchi = self.minimizerResult.redchi # Run processes numProcess = max(int(numIteration / ITERATION_PER_PROCESS), 1) numProcess = min(numProcess, maxProcess) numProcessIteration = int(np.ceil(numIteration / numProcess)) args_list = [ _Arguments(self, numProcess, i, numIteration=numProcessIteration, reportInterval=reportInterval, synthesizerClass=synthesizerClass, _loggerPrefix="bootstrap", **kwargs) for i in range(numProcess) ] msg = "Running bootstrap for %d successful iterations " % numIteration msg += "with %d processes." % numProcess self.logger.activity(msg) # Run separate processes for each bootstrap processes = [] queue = multiprocessing.Queue() results = [] # Set to False for debug so not doing multiple processes if True: for args in args_list: p = multiprocessing.Process(target=_runBootstrap, args=( args, queue, )) p.start() processes.append(p) timeout = MAX_ITERATION_TIME * numProcessIteration try: # Get rid of possible zombies for _ in range(len(processes)): results.append(queue.get(timeout=timeout)) # Get rid of possible zombies for process in processes: process.terminate() except Exception as err: msg = "modelFitterBootstrap/Error in process management" self.logger.error(msg, err) finally: pass else: # Keep to debug _runBootstrap single threaded results = [] for args in args_list: results.append(_runBootstrap(args)) if len(results) == 0: msg = "modelFitterBootstrap/timeout in solving model." msg = "\nConsider increasing per timeout." msg = "\nCurent value: %f" % MAX_ITERATION_TIME self.logger.result(msg) else: self.bootstrapResult = BootstrapResult.merge(results) # Update the logger in place _ = _helpers.copyObject(self.bootstrapResult.fitter.logger, self.logger) if self.bootstrapResult.fittedStatistic is not None: self.bootstrapResult.fittedStatistic.calculate() self.logger.result("%d bootstrap estimates of parameters." % self.bootstrapResult.numSimulation) if serializePath is not None: self.serialize(serializePath)
def _runBootstrap(arguments: _Arguments, queue=None) -> BootstrapResult: """ Executes bootstrapping. Parameters ---------- arguments: inputs to bootstrap queue: multiprocessing.Queue Notes ----- 1. Only the first process generates progress reports. 2. Uses METHOD_LEASTSQ for fitModel iterations. """ fitter = arguments.fitter logger = fitter.logger mainBlock = Logger.join(arguments._loggerPrefix, "_runBootstrap") mainGuid = logger.startBlock(mainBlock) # Unapack arguments isSuccess = False lastErr = "" # Do an initial fit for _ in range(MAX_TRIES): try: fitter.fitModel() # Initialize model isSuccess = True break except Exception as err: lastErr = err # Set up logging for this process fd = logger.getFileDescriptor() processIdx = arguments.processIdx if fd is not None: sys.stderr = logger.getFileDescriptor() sys.stdout = logger.getFileDescriptor() iterationGuid = None if not isSuccess: msg = "Process %d/modelFitterBootstrip/_runBootstrap" % processIdx logger.error(msg, lastErr) fittedStatistic = TimeseriesStatistic(fitter.observedTS, percentiles=[]) bootstrapResult = BootstrapResult(fitter, 0, {}, fittedStatistic) else: numIteration = arguments.numIteration reportInterval = arguments.reportInterval processingRate = min(arguments.numProcess, multiprocessing.cpu_count()) cols = fitter.selectedColumns synthesizer = arguments.synthesizerClass( observedTS=fitter.observedTS.subsetColumns(cols), fittedTS=fitter.fittedTS.subsetColumns(cols), **arguments.kwargs) # Initialize parameterDct = {p: [] for p in fitter.parametersToFit} numSuccessIteration = 0 lastReport = 0 if fitter.minimizerResult is None: fitter.fitModel() baseChisq = fitter.minimizerResult.redchi # Do the bootstrap iterations bootstrapError = 0 iterationBlock = Logger.join(mainBlock, "Iteration") for iteration in range(numIteration * ITERATION_MULTIPLIER): if iterationGuid is not None: logger.endBlock(iterationGuid) iterationGuid = logger.startBlock(iterationBlock) newObservedTS = synthesizer.calculate() fittingSetupBlock = Logger.join(iterationBlock, "fittingSetup") fittingSetupGuid = logger.startBlock(fittingSetupBlock) newFitter = ModelFitterBootstrap( fitter.roadrunnerModel, newObservedTS, fitter.parametersToFit, selectedColumns=fitter.selectedColumns, # Use bootstrap methods for fitting fitterMethods=fitter._bootstrapMethods, parameterLowerBound=fitter.lowerBound, parameterUpperBound=fitter.upperBound, fittedDataTransformDct=fitter.fittedDataTransformDct, logger=logger, _loggerPrefix=iterationBlock, isPlot=fitter._isPlot) fittedStatistic = TimeseriesStatistic(newFitter.observedTS, percentiles=[]) logger.endBlock(fittingSetupGuid) try: if (iteration > 0) and (iteration != lastReport) \ and (processIdx == 0): totalSuccessIteration = numSuccessIteration * processingRate totalIteration = iteration * processingRate if totalIteration % reportInterval == 0: msg = "Bootstrap completed %d total iterations " msg += "with %d successes." msg = msg % (totalIteration, totalSuccessIteration) fitter.logger.status(msg) lastReport = numSuccessIteration if numSuccessIteration >= numIteration: # Performed the iterations break tryBlock = Logger.join(iterationBlock, "try") tryGuid = logger.startBlock(tryBlock) try: tryFitterBlock = Logger.join(tryBlock, "Fitter") tryFitterGuid = logger.startBlock(tryFitterBlock) newFitter.fitModel(params=fitter.params) logger.endBlock(tryFitterGuid) except Exception as err: # Problem with the fit. Don't numSuccessIteration it. msg = "Process %d/modelFitterBootstrap" % processIdx msg += " Fit failed on iteration %d." % iteration fitter.logger.error(msg, err) logger.endBlock(tryGuid) continue if newFitter.minimizerResult.redchi > MAX_CHISQ_MULT * baseChisq: if IS_REPORT: msg = "Process %d: Fit has high chisq: %2.2f on iteration %d." fitter.logger.exception( msg % (processIdx, newFitter.minimizerResult.redchi, iteration)) logger.endBlock(tryGuid) continue if newFitter.params is None: continue numSuccessIteration += 1 dct = newFitter.params.valuesdict() [ parameterDct[p].append(dct[p]) for p in fitter.parametersToFit ] cols = newFitter.fittedTS.colnames fittedStatistic.accumulate(newFitter.fittedTS) newFitter.observedTS = synthesizer.calculate() logger.endBlock(tryGuid) except Exception as err: msg = "Process %d/modelFitterBootstrap" % processIdx msg += " Error on iteration %d." % iteration fitter.logger.error(msg, err) bootstrapError += 1 fitter.logger.status("Process %d: completed bootstrap." % (processIdx + 1)) bootstrapResult = BootstrapResult(fitter, numSuccessIteration, parameterDct, fittedStatistic, bootstrapError=bootstrapError) if iterationGuid is not None: logger.endBlock(iterationGuid) logger.endBlock(mainGuid) if fd is not None: if not fd.closed: fd.close() if queue is None: return bootstrapResult else: queue.put(bootstrapResult)
def bootstrap( self, isParallel=True, # The following must be kept in sync with ModelFitterCore.__init__ numIteration: int = None, maxProcess: int = None, serializePath: str = None, **kwargs: dict): """ Constructs a bootstrap estimate of parameter values. Parameters ---------- isParallel: bool run in parallel numIteration: number of bootstrap iterations maxProcess: Maximum number of processes to use. Default: numCPU serializePath: Where to serialize the fitter after bootstrap kwargs: arguments passed to ObservationSynthesizer Example ------- f.bootstrap() f.getParameterMeans() # Mean values f.getParameterStds() # standard deviations Notes 1. Arguments can be overriden by the constructor using the keyword argument bootstrapKwargs. ---- """ def getValue(name, value, defaultValue=None): if value is not None: return value # Handle arguments specified in constructor if name in self.bootstrapKwargs: if self.bootstrapKwargs[name] is not None: return self.bootstrapKwargs[name] if name in self.__dict__.keys(): return self.__dict__[name] # None specified return defaultValue # # Initialization numIteration = getValue("numIteration", numIteration) isParallel = getValue("_isParallel", isParallel) isProgressBar = getValue("_isProgressBar", None, defaultValue=True) if maxProcess is None: maxProcess = self._maxProcess if maxProcess is None: maxProcess = multiprocessing.cpu_count() serializePath = getValue("serializePath", serializePath) # Ensure that there is a fitted model if self.minimizerResult is None: self.fitModel() # Construct arguments collection numProcess = min(maxProcess, numIteration) batchSize = numIteration // numProcess argumentsCol = [ RunnerArgument(self, numIteration=batchSize, _loggerPrefix="bootstrap", **kwargs) for i in range(numProcess) ] # Run separate processes for each batch runner = ParallelRunner(BootstrapRunner, desc="iteration", maxProcess=numProcess) results = runner.runSync(argumentsCol, isParallel=isParallel, isProgressBar=isProgressBar) # Check the results if len(results) == 0: msg = "modelFitterBootstrap/timeout in solving model." msg = "\nConsider increasing per timeout." msg = "\nCurent value: %f" % MAX_ITERATION_TIME self.logger.result(msg) else: self.bootstrapResult = BootstrapResult.merge(results, self) # Update the logger in place _ = _helpers.copyObject(self.bootstrapResult.fitter.logger, self.logger) self.bootstrapResult.fittedStatistic.calculate() self.logger.result("%d bootstrap estimates of parameters." % self.bootstrapResult.numSimulation) if serializePath is not None: self.serialize(serializePath)