def bootstrap( self, # The following must be kept in sync with ModelFitterCore.__init__ numIteration: int = 10, reportInterval: int = 1000, synthesizerClass=ObservationSynthesizerRandomizedResiduals, maxProcess: int = None, serializePath: str = None, **kwargs: dict): """ Constructs a bootstrap estimate of parameter values. Parameters ---------- numIteration: number of bootstrap iterations reportInterval: number of iterations between progress reports synthesizerClass: object that synthesizes new observations Must subclass ObservationSynthesizer maxProcess: Maximum number of processes to use. Default: numCPU serializePath: Where to serialize the fitter after bootstrap kwargs: arguments passed to ObservationSynthesizer Example ------- f.bootstrap() f.getParameterMeans() # Mean values f.getParameterStds() # standard deviations Notes 1. Arguments can be overriden by the constructor using the keyword argument bootstrapKwargs. ---- """ def get(name, value): if name in self.bootstrapKwargs: if self.bootstrapKwargs[name] is not None: return self.bootstrapKwargs[name] else: return value else: return value # Handle overrides of arguments specified in constructor numIteration = get("numIteration", numIteration) reportInterval = get("reportInterval", reportInterval) synthesizerClass = get("synthesizerClass", synthesizerClass) maxProcess = get("maxProcess", maxProcess) serializePath = get("serializePath", serializePath) # Other initializations if maxProcess is None: maxProcess = multiprocessing.cpu_count() if self.minimizerResult is None: self.fitModel() base_redchi = self.minimizerResult.redchi # Run processes numProcess = max(int(numIteration / ITERATION_PER_PROCESS), 1) numProcess = min(numProcess, maxProcess) numProcessIteration = int(np.ceil(numIteration / numProcess)) args_list = [ _Arguments(self, numProcess, i, numIteration=numProcessIteration, reportInterval=reportInterval, synthesizerClass=synthesizerClass, _loggerPrefix="bootstrap", **kwargs) for i in range(numProcess) ] msg = "Running bootstrap for %d successful iterations " % numIteration msg += "with %d processes." % numProcess self.logger.activity(msg) # Run separate processes for each bootstrap processes = [] queue = multiprocessing.Queue() results = [] # Set to False for debug so not doing multiple processes if True: for args in args_list: p = multiprocessing.Process(target=_runBootstrap, args=( args, queue, )) p.start() processes.append(p) timeout = MAX_ITERATION_TIME * numProcessIteration try: # Get rid of possible zombies for _ in range(len(processes)): results.append(queue.get(timeout=timeout)) # Get rid of possible zombies for process in processes: process.terminate() except Exception as err: msg = "modelFitterBootstrap/Error in process management" self.logger.error(msg, err) finally: pass else: # Keep to debug _runBootstrap single threaded results = [] for args in args_list: results.append(_runBootstrap(args)) if len(results) == 0: msg = "modelFitterBootstrap/timeout in solving model." msg = "\nConsider increasing per timeout." msg = "\nCurent value: %f" % MAX_ITERATION_TIME self.logger.result(msg) else: self.bootstrapResult = BootstrapResult.merge(results) # Update the logger in place _ = _helpers.copyObject(self.bootstrapResult.fitter.logger, self.logger) if self.bootstrapResult.fittedStatistic is not None: self.bootstrapResult.fittedStatistic.calculate() self.logger.result("%d bootstrap estimates of parameters." % self.bootstrapResult.numSimulation) if serializePath is not None: self.serialize(serializePath)
def bootstrap( self, isParallel=True, # The following must be kept in sync with ModelFitterCore.__init__ numIteration: int = None, maxProcess: int = None, serializePath: str = None, **kwargs: dict): """ Constructs a bootstrap estimate of parameter values. Parameters ---------- isParallel: bool run in parallel numIteration: number of bootstrap iterations maxProcess: Maximum number of processes to use. Default: numCPU serializePath: Where to serialize the fitter after bootstrap kwargs: arguments passed to ObservationSynthesizer Example ------- f.bootstrap() f.getParameterMeans() # Mean values f.getParameterStds() # standard deviations Notes 1. Arguments can be overriden by the constructor using the keyword argument bootstrapKwargs. ---- """ def getValue(name, value, defaultValue=None): if value is not None: return value # Handle arguments specified in constructor if name in self.bootstrapKwargs: if self.bootstrapKwargs[name] is not None: return self.bootstrapKwargs[name] if name in self.__dict__.keys(): return self.__dict__[name] # None specified return defaultValue # # Initialization numIteration = getValue("numIteration", numIteration) isParallel = getValue("_isParallel", isParallel) isProgressBar = getValue("_isProgressBar", None, defaultValue=True) if maxProcess is None: maxProcess = self._maxProcess if maxProcess is None: maxProcess = multiprocessing.cpu_count() serializePath = getValue("serializePath", serializePath) # Ensure that there is a fitted model if self.minimizerResult is None: self.fitModel() # Construct arguments collection numProcess = min(maxProcess, numIteration) batchSize = numIteration // numProcess argumentsCol = [ RunnerArgument(self, numIteration=batchSize, _loggerPrefix="bootstrap", **kwargs) for i in range(numProcess) ] # Run separate processes for each batch runner = ParallelRunner(BootstrapRunner, desc="iteration", maxProcess=numProcess) results = runner.runSync(argumentsCol, isParallel=isParallel, isProgressBar=isProgressBar) # Check the results if len(results) == 0: msg = "modelFitterBootstrap/timeout in solving model." msg = "\nConsider increasing per timeout." msg = "\nCurent value: %f" % MAX_ITERATION_TIME self.logger.result(msg) else: self.bootstrapResult = BootstrapResult.merge(results, self) # Update the logger in place _ = _helpers.copyObject(self.bootstrapResult.fitter.logger, self.logger) self.bootstrapResult.fittedStatistic.calculate() self.logger.result("%d bootstrap estimates of parameters." % self.bootstrapResult.numSimulation) if serializePath is not None: self.serialize(serializePath)