def mkNullResult(): fittedStatistic = TimeseriesStatistic( self.fitter.observedTS[self.fitter.selectedColumns]) return BootstrapResult(self.fitter, 0, {}, fittedStatistic)
class TestBootstrapResult(unittest.TestCase): def setUp(self): self.fitter = FITTER self.speciesNames = list(th.VARIABLE_NAMES) self.parameterNames = list(th.PARAMETER_DCT.keys()) self.parameterDct = { n: np.random.randint(10, 20, NUM_ITERATION) for n in self.parameterNames } self.fittedStatistic = TimeseriesStatistic(self.fitter.fittedTS) for _ in range(NUM_ITERATION): ts = self.fitter.fittedTS.copy() for name in self.speciesNames: arr = 3 * np.random.random(len(self.fitter.fittedTS)) ts[name] += arr self.fittedStatistic.accumulate(ts) self.bootstrapResult = br.BootstrapResult(self.fitter, NUM_ITERATION, self.parameterDct, self.fittedStatistic) def testConstructor(self): if IGNORE_TEST: return keys = self.bootstrapResult.parameterStdDct.keys() diff = set(keys).symmetric_difference(self.bootstrapResult.parameters) self.assertEqual(len(diff), 0) def testParams(self): if IGNORE_TEST: return params = self.bootstrapResult.params name = self.parameterNames[0] self.assertEqual(params.valuesdict()[name], np.mean(self.parameterDct[name])) def testMerge(self): if IGNORE_TEST: return bootstrapResult = br.BootstrapResult(self.fitter, NUM_ITERATION, self.parameterDct, self.fittedStatistic) mergedResult = br.BootstrapResult.merge( [self.bootstrapResult, bootstrapResult], self.fitter) self.assertEqual(mergedResult.numIteration, 2 * NUM_ITERATION) self.assertEqual( len(mergedResult.parameterDct[self.parameterNames[0]]), mergedResult.numIteration) def testSimulate(self): if IGNORE_TEST: return self.bootstrapResult.setFitter(self.fitter) statistic = self.bootstrapResult.simulate() lowers = statistic.percentileDct[bsr.PERCENTILES[0]].flatten() uppers = statistic.percentileDct[bsr.PERCENTILES[-1]].flatten() trues = [l <= u for l, u in zip(lowers, uppers)] self.assertTrue(all(trues)) def testRpickleInterface(self): if IGNORE_TEST: return serialization = rpickle.Serialization(self.bootstrapResult) bootstrapResult = serialization.deserialize() self.assertTrue( bootstrapResult.fittedStatistic.meanTS.equals( self.bootstrapResult.fittedStatistic.meanTS))
class TestNamedTimeseries(unittest.TestCase): def setUp(self): self.timeseries = SIMPLE_TS self.statistic = TimeseriesStatistic(self.timeseries) def testConstructor(self): if IGNORE_TEST: return colnames = list(COLNAMES) colnames.remove(TIME) diff = set(self.statistic.colnames).symmetric_difference(colnames) self.assertEqual(len(diff), 0) def testAccumulate(self): if IGNORE_TEST: return self.statistic.accumulate(SIMPLE_TS) self.assertTrue(self.statistic.sumTS.equals(SIMPLE_TS)) def testCalculate1(self): if IGNORE_TEST: return for _ in range(SIMPLE_CNT): self.statistic.accumulate(SIMPLE_TS) self.statistic.calculate() self.assertEqual(self.statistic.count, SIMPLE_CNT) self.assertTrue(self.statistic.meanTS.equals(SIMPLE_TS)) stdTS = SIMPLE_TS.copy(isInitialize=True) self.assertTrue(self.statistic.stdTS.equals(stdTS)) def mkStatistics(self, count): result = [] for _ in range(count): statistic = TimeseriesStatistic(UNIFORM_TS) for _ in range(UNIFORM_CNT): statistic.accumulate( mkTimeseries(UNIFORM_LEN, COLNAMES, isRandom=True)) result.append(statistic) return result def evaluateStatistic(self, statistic, count=1): statistic.calculate() self.assertEqual(statistic.count, count * UNIFORM_CNT) mean = np.mean(statistic.meanTS.flatten()) self.assertLess(np.abs(mean - UNIFORM_MEAN), 0.1) std = np.mean(statistic.stdTS.flatten()) self.assertLess(np.abs(std - UNIFORM_STD), 0.1) for percentile in self.statistic.percentiles: value = np.mean(statistic.percentileDct[percentile].flatten()) self.assertLess(np.abs(value - 0.01 * percentile), 0.01) def testCalculate2(self): if IGNORE_TEST: return statistic = self.mkStatistics(1)[0] self.evaluateStatistic(statistic) def testEquals(self): if IGNORE_TEST: return statistic = TimeseriesStatistic(self.timeseries) self.assertTrue(self.statistic.equals(statistic)) # statistic.accumulate(SIMPLE_TS) self.assertFalse(self.statistic.equals(statistic)) def testCopy(self): if IGNORE_TEST: return statistic = self.statistic.copy() self.assertTrue(self.statistic.equals(statistic)) # statistic = self.mkStatistics(1)[0] self.assertTrue(statistic.equals(statistic)) def testMerge(self): if IGNORE_TEST: return NUM = 4 statistics = self.mkStatistics(NUM) statistic = TimeseriesStatistic.merge(statistics) statistic.calculate() self.evaluateStatistic(statistic, count=NUM) def testRpickleInterface(self): if IGNORE_TEST: return serialization = rpickle.Serialization(self.statistic) statistic = serialization.deserialize() self.assertTrue(statistic.meanTS.equals(self.statistic.meanTS))
def setUp(self): self.timeseries = SIMPLE_TS self.statistic = TimeseriesStatistic(self.timeseries)
def _runBootstrap(arguments: _Arguments, queue=None) -> BootstrapResult: """ Executes bootstrapping. Parameters ---------- arguments: inputs to bootstrap queue: multiprocessing.Queue Notes ----- 1. Only the first process generates progress reports. 2. Uses METHOD_LEASTSQ for fitModel iterations. """ fitter = arguments.fitter logger = fitter.logger mainBlock = Logger.join(arguments._loggerPrefix, "_runBootstrap") mainGuid = logger.startBlock(mainBlock) # Unapack arguments isSuccess = False lastErr = "" # Do an initial fit for _ in range(MAX_TRIES): try: fitter.fitModel() # Initialize model isSuccess = True break except Exception as err: lastErr = err # Set up logging for this process fd = logger.getFileDescriptor() processIdx = arguments.processIdx if fd is not None: sys.stderr = logger.getFileDescriptor() sys.stdout = logger.getFileDescriptor() iterationGuid = None if not isSuccess: msg = "Process %d/modelFitterBootstrip/_runBootstrap" % processIdx logger.error(msg, lastErr) fittedStatistic = TimeseriesStatistic(fitter.observedTS, percentiles=[]) bootstrapResult = BootstrapResult(fitter, 0, {}, fittedStatistic) else: numIteration = arguments.numIteration reportInterval = arguments.reportInterval processingRate = min(arguments.numProcess, multiprocessing.cpu_count()) cols = fitter.selectedColumns synthesizer = arguments.synthesizerClass( observedTS=fitter.observedTS.subsetColumns(cols), fittedTS=fitter.fittedTS.subsetColumns(cols), **arguments.kwargs) # Initialize parameterDct = {p: [] for p in fitter.parametersToFit} numSuccessIteration = 0 lastReport = 0 if fitter.minimizerResult is None: fitter.fitModel() baseChisq = fitter.minimizerResult.redchi # Do the bootstrap iterations bootstrapError = 0 iterationBlock = Logger.join(mainBlock, "Iteration") for iteration in range(numIteration * ITERATION_MULTIPLIER): if iterationGuid is not None: logger.endBlock(iterationGuid) iterationGuid = logger.startBlock(iterationBlock) newObservedTS = synthesizer.calculate() fittingSetupBlock = Logger.join(iterationBlock, "fittingSetup") fittingSetupGuid = logger.startBlock(fittingSetupBlock) newFitter = ModelFitterBootstrap( fitter.roadrunnerModel, newObservedTS, fitter.parametersToFit, selectedColumns=fitter.selectedColumns, # Use bootstrap methods for fitting fitterMethods=fitter._bootstrapMethods, parameterLowerBound=fitter.lowerBound, parameterUpperBound=fitter.upperBound, fittedDataTransformDct=fitter.fittedDataTransformDct, logger=logger, _loggerPrefix=iterationBlock, isPlot=fitter._isPlot) fittedStatistic = TimeseriesStatistic(newFitter.observedTS, percentiles=[]) logger.endBlock(fittingSetupGuid) try: if (iteration > 0) and (iteration != lastReport) \ and (processIdx == 0): totalSuccessIteration = numSuccessIteration * processingRate totalIteration = iteration * processingRate if totalIteration % reportInterval == 0: msg = "Bootstrap completed %d total iterations " msg += "with %d successes." msg = msg % (totalIteration, totalSuccessIteration) fitter.logger.status(msg) lastReport = numSuccessIteration if numSuccessIteration >= numIteration: # Performed the iterations break tryBlock = Logger.join(iterationBlock, "try") tryGuid = logger.startBlock(tryBlock) try: tryFitterBlock = Logger.join(tryBlock, "Fitter") tryFitterGuid = logger.startBlock(tryFitterBlock) newFitter.fitModel(params=fitter.params) logger.endBlock(tryFitterGuid) except Exception as err: # Problem with the fit. Don't numSuccessIteration it. msg = "Process %d/modelFitterBootstrap" % processIdx msg += " Fit failed on iteration %d." % iteration fitter.logger.error(msg, err) logger.endBlock(tryGuid) continue if newFitter.minimizerResult.redchi > MAX_CHISQ_MULT * baseChisq: if IS_REPORT: msg = "Process %d: Fit has high chisq: %2.2f on iteration %d." fitter.logger.exception( msg % (processIdx, newFitter.minimizerResult.redchi, iteration)) logger.endBlock(tryGuid) continue if newFitter.params is None: continue numSuccessIteration += 1 dct = newFitter.params.valuesdict() [ parameterDct[p].append(dct[p]) for p in fitter.parametersToFit ] cols = newFitter.fittedTS.colnames fittedStatistic.accumulate(newFitter.fittedTS) newFitter.observedTS = synthesizer.calculate() logger.endBlock(tryGuid) except Exception as err: msg = "Process %d/modelFitterBootstrap" % processIdx msg += " Error on iteration %d." % iteration fitter.logger.error(msg, err) bootstrapError += 1 fitter.logger.status("Process %d: completed bootstrap." % (processIdx + 1)) bootstrapResult = BootstrapResult(fitter, numSuccessIteration, parameterDct, fittedStatistic, bootstrapError=bootstrapError) if iterationGuid is not None: logger.endBlock(iterationGuid) logger.endBlock(mainGuid) if fd is not None: if not fd.closed: fd.close() if queue is None: return bootstrapResult else: queue.put(bootstrapResult)