Example #1
0
class ModelFitterCore(rpickle.RPickler):
    def __init__(
        self,
        modelSpecification,
        observedData,
        parametersToFit=None,
        selectedColumns=None,
        fitterMethods=METHOD_FITTER_DEFAULTS,
        numFitRepeat=1,
        bootstrapMethods=METHOD_BOOTSTRAP_DEFAULTS,
        parameterLowerBound=PARAMETER_LOWER_BOUND,
        parameterUpperBound=PARAMETER_UPPER_BOUND,
        parameterDct={},
        fittedDataTransformDct={},
        logger=Logger(),
        isPlot=True,
        _loggerPrefix="",
        # The following must be kept in sync with ModelFitterBootstrap.bootstrap
        numIteration: int = 10,
        reportInterval: int = 1000,
        synthesizerClass=ObservationSynthesizerRandomizedResiduals,
        maxProcess: int = None,
        serializePath: str = None,
    ):
        """
        Constructs estimates of parameter values. 
    
        Parameters
        ----------
        modelSpecification: ExtendedRoadRunner/str
            roadrunner model or antimony model
        observedData: NamedTimeseries/str
            str: path to CSV file
        parametersToFit: list-str/None
            parameters in the model that you want to fit
            if None, no parameters are fit
        selectedColumns: list-str
            species names you wish use to fit the model
            default: all columns in observedData
        parameterLowerBound: float
            lower bound for the fitting parameters
        parameterUpperBound: float
            upper bound for the fitting parameters
        parameterDct: dict
            key: parameter name
            value: triple - (lowerVange, startingValue, upperRange)
        fittedDataTransformDct: dict
            key: column in selectedColumns
            value: function of the data in selectedColumns;
                   input: NamedTimeseries
                   output: array for the values of the column
        logger: Logger
        fitterMethods: str/list-str
            method used for minimization in fitModel
        numFitRepeat: int
            number of times fitting is repeated for a method
        bootstrapMethods: str/list-str
            method used for minimization in bootstrap
        numIteration: number of bootstrap iterations
        reportInterval: number of iterations between progress reports
        synthesizerClass: object that synthesizes new observations
            Must subclass ObservationSynthesizer
        maxProcess: Maximum number of processes to use. Default: numCPU
        serializePath: Where to serialize the fitter after bootstrap

        Usage
        -----
        parameterDct = {
            "k1": (1, 5, 10),  # name of parameter: low value, initial, high
            "k2": (2, 3, 6)}
        ftter = ModelFitter(roadrunnerModel, "observed.csv",
            parameterDct=parameterDct)
        fitter.fitModel()  # Do the fit
        fitter.bootstrap()  # Estimate parameter variance with bootstrap
        """
        if modelSpecification is not None:
            # Not the default constructor
            self._loggerPrefix = _loggerPrefix
            self.modelSpecification = modelSpecification
            self.parametersToFit = parametersToFit
            self.lowerBound = parameterLowerBound
            self.upperBound = parameterUpperBound
            self.bootstrapKwargs = dict(
                numIteration=numIteration,
                reportInterval=reportInterval,
                maxProcess=maxProcess,
                serializePath=serializePath,
            )
            self.parameterDct = self._updateParameterDct(parameterDct)
            self._numFitRepeat = numFitRepeat
            if self.parametersToFit is None:
                self.parametersToFit = [p for p in self.parameterDct.keys()]
            self.observedTS = observedData
            if self.observedTS is not None:
                self.observedTS = mkNamedTimeseries(observedData)
            #
            self.fittedDataTransformDct = fittedDataTransformDct
            #
            if (selectedColumns is None) and (self.observedTS is not None):
                selectedColumns = self.observedTS.colnames
            self.selectedColumns = selectedColumns
            # Construct array of non-nan observed values
            self._observedArr = self.observedTS[self.selectedColumns].flatten()
            # Other internal state
            self._fitterMethods = fitterMethods
            if isinstance(self._fitterMethods, str):
                if self._fitterMethods == METHOD_BOTH:
                    self._fitterMethods = METHOD_FITTER_DEFAULTS
                else:
                    self._fitterMethods = [self._fitterMethods]
            self._bootstrapMethods = bootstrapMethods
            if isinstance(self._bootstrapMethods, str):
                self._bootstrapMethods = [self._bootstrapMethods]
            self._isPlot = isPlot
            self._plotter = tp.TimeseriesPlotter(isPlot=self._isPlot)
            self._plotFittedTS = None  # Timeseries that is plotted
            self.logger = logger
            # The following are calculated during fitting
            self.roadrunnerModel = None
            self.minimizer = None  # lmfit.minimizer
            self.minimizerResult = None  # Results of minimization
            self.params = None  # params property in lmfit.minimizer
            self.fittedTS = self.observedTS.copy(
                isInitialize=True)  # Initialize
            self.residualsTS = None  # Residuals for selectedColumns
            self.bootstrapResult = None  # Result from bootstrapping
            # Validation checks
            self._validateFittedDataTransformDct()
        else:
            pass

    @classmethod
    def rpConstruct(cls):
        """
        Overrides rpickler.rpConstruct to create a method that
        constructs an instance without arguments.
        
        Returns
        -------
        Instance of cls
        """
        return cls(None, None, None)

    def rpRevise(self):
        """
        Overrides rpickler.
        """
        if not "logger" in self.__dict__.keys():
            self.logger = Logger()

    def _validateFittedDataTransformDct(self):
        if self.fittedDataTransformDct is not None:
            keySet = set(self.fittedDataTransformDct.keys())
            selectedColumnsSet = self.selectedColumns
            if (keySet is not None) and (selectedColumnsSet is not None):
                excess = set(keySet).difference(selectedColumnsSet)
                if len(excess) > 0:
                    msg = "Columns not in selectedColumns: %s" % str(excess)
                    raise ValueError(excess)

    def _transformFittedTS(self, data):
        """
        Updates the fittedTS taking into account required transformations.
 
        Parameters
        ----------
        data: np.ndarray
 
        Results
        ----------
        NamedTimeseries
        """
        colnames = list(self.selectedColumns)
        colnames.insert(0, TIME)
        fittedTS = NamedTimeseries(array=data[:, :], colnames=colnames)
        if self.fittedDataTransformDct is not None:
            for column, func in self.fittedDataTransformDct.items():
                if func is not None:
                    fittedTS[column] = func(fittedTS)
        return fittedTS

    def _updateParameterDct(self, parameterDct):
        """
        Handles values that are tuples instead of ParameterSpecification.
        """
        dct = dict(parameterDct)
        for name, value in parameterDct.items():
            if isinstance(value, tuple):
                dct[name] = ParameterSpecification(lower=value[0],
                                                   upper=value[1],
                                                   value=value[2])
        return dct

    @staticmethod
    def addParameter(parameterDct: dict, name: str, lower: float, upper: float,
                     value: float):
        """
        Adds a parameter to a list of parameters.

        Parameters
        ----------
        parameterDct: parameter dictionary to agument
        name: parameter name
        lower: lower range of parameter value
        upper: upper range of parameter value
        value: initial value
        
        Returns
        -------
        dict
        """
        parameterDct[name] = ParameterSpecification(lower=lower,
                                                    upper=upper,
                                                    value=value)

    def _adjustNames(self, antimonyModel:str, observedTS:NamedTimeseries)  \
          ->typing.Tuple[NamedTimeseries, list]:
        """
        Antimony exports can change the names of floating species
        by adding a "_" at the end. Check for this and adjust
        the names in observedTS.

        Return
        ------
        NamedTimeseries: newObservedTS
        list: newSelectedColumns
        """
        rr = te.loada(antimonyModel)
        dataNames = rr.simulate().colnames
        names = ["[%s]" % n for n in observedTS.colnames]
        missingNames = [n[1:-1] for n in set(names).difference(dataNames)]
        newSelectedColumns = list(self.selectedColumns)
        if len(missingNames) > 0:
            newObservedTS = observedTS.copy()
            self.logger.exception("Missing names in antimony export: %s" %
                                  str(missingNames))
            for name in observedTS.colnames:
                missingName = "%s_" % name
                if name in missingNames:
                    newObservedTS = newObservedTS.rename(name, missingName)
                    newSelectedColumns.remove(name)
                    newSelectedColumns.append(missingName)
        else:
            newObservedTS = observedTS
        return newObservedTS, newSelectedColumns

    def copy(self, isKeepLogger=False):
        """
        Creates a copy of the model fitter.
        Preserves the user-specified settings and the results
        of bootstrapping.
        """
        if not isinstance(self.modelSpecification, str):
            try:
                modelSpecification = self.modelSpecification.getAntimony()
            except Exception as err:
                self.logger.error(
                    "Problem wth conversion to Antimony. Details:", err)
                raise ValueError("Cannot proceed.")
            observedTS, selectedColumns = self._adjustNames(
                modelSpecification, self.observedTS)
        else:
            modelSpecification = self.modelSpecification
            observedTS = self.observedTS.copy()
            selectedColumns = self.selectedColumns
        #
        if isKeepLogger:
            logger = self.logger
        elif self.logger is not None:
            logger = self.logger.copy()
        else:
            logger = None
        newModelFitter = self.__class__(
            copy.deepcopy(modelSpecification),
            observedTS,
            copy.deepcopy(self.parametersToFit),
            selectedColumns=selectedColumns,
            fitterMethods=self._fitterMethods,
            bootstrapMethods=self._bootstrapMethods,
            parameterLowerBound=self.lowerBound,
            parameterUpperBound=self.upperBound,
            parameterDct=copy.deepcopy(self.parameterDct),
            fittedDataTransformDct=copy.deepcopy(self.fittedDataTransformDct),
            logger=logger,
            isPlot=self._isPlot)
        if self.bootstrapResult is not None:
            newModelFitter.bootstrapResult = self.bootstrapResult.copy()
            newModelFitter.params = newModelFitter.bootstrapResult.params
        else:
            newModelFitter.bootstrapResult = None
            newModelFitter.params = self.params
        return newModelFitter

    def _initializeRoadrunnerModel(self):
        """
        Sets self.roadrunnerModel.
        """
        if isinstance(self.modelSpecification,
                      te.roadrunner.extended_roadrunner.ExtendedRoadRunner):
            self.roadrunnerModel = self.modelSpecification
        elif isinstance(self.modelSpecification, str):
            self.roadrunnerModel = te.loada(self.modelSpecification)
        else:
            msg = 'Invalid model.'
            msg = msg + "\nA model must either be a Roadrunner model "
            msg = msg + "an Antimony model."
            raise ValueError(msg)

    def getDefaultParameterValues(self):
        """
        Obtain the original values of parameters.
        
        Returns
        -------
        dict:
            key: parameter name
            value: value of parameter
        """
        dct = {}
        self._initializeRoadrunnerModel()
        self.roadrunnerModel.reset()
        for parameterName in self.parametersToFit:
            dct[parameterName] = self.roadrunnerModel.model[parameterName]
        return dct

    def simulate(self,
                 params=None,
                 startTime=None,
                 endTime=None,
                 numPoint=None):
        """
        Runs a simulation. Defaults to parameter values in the simulation.

        Parameters
       ----------
        params: lmfit.Parameters
        startTime: float
        endTime: float
        numPoint: int

        Return
        ------
        NamedTimeseries
        """
        def set(default, parameter):
            # Sets to default if parameter unspecified
            if parameter is None:
                return default
            else:
                return parameter

        ##V
        block = Logger.join(self._loggerPrefix, "fitModel.simulate")
        guid = self.logger.startBlock(block)
        ## V
        sub1Block = Logger.join(block, "sub1")
        sub1Guid = self.logger.startBlock(sub1Block)
        startTime = set(self.observedTS.start, startTime)
        endTime = set(self.observedTS.end, endTime)
        numPoint = set(len(self.observedTS), numPoint)
        ##  V
        sub1aBlock = Logger.join(sub1Block, "sub1a")
        sub1aGuid = self.logger.startBlock(sub1aBlock)
        if self.roadrunnerModel is None:
            self._initializeRoadrunnerModel()
        self.roadrunnerModel.reset()
        ##  ^
        self.logger.endBlock(sub1aGuid)
        ##  V
        sub1bBlock = Logger.join(sub1Block, "sub1b")
        sub1bGuid = self.logger.startBlock(sub1bBlock)
        if params is not None:
            # Parameters have been specified
            self._setupModel(params)
        ##  ^
        self.logger.endBlock(sub1bGuid)
        # Do the simulation
        selectedColumns = list(self.selectedColumns)
        if not TIME in selectedColumns:
            selectedColumns.insert(0, TIME)
        ## ^
        self.logger.endBlock(sub1Guid)
        ## V
        roadrunnerBlock = Logger.join(block, "roadrunner")
        roadrunnerGuid = self.logger.startBlock(roadrunnerBlock)
        data = self.roadrunnerModel.simulate(startTime, endTime, numPoint,
                                             selectedColumns)
        self.logger.endBlock(roadrunnerGuid)
        ## ^
        # Select the required columns
        ## V
        sub2Block = Logger.join(block, "sub2")
        sub2Guid = self.logger.startBlock(sub2Block)
        fittedTS = NamedTimeseries(namedArray=data)
        self.logger.endBlock(sub2Guid)
        ## ^
        self.logger.endBlock(guid)
        ##^
        return fittedTS

    def updateFittedAndResiduals(self, **kwargs) -> np.ndarray:
        """
        Updates values of self.fittedTS and self.residualsTS
        based on self.params.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Instance Variables Updated
        --------------------------
        self.fittedTS
        self.residualsTS

        Returns
        -------
        1-d ndarray of residuals
        """
        self.fittedTS = self.simulate(**kwargs)  # Updates self.fittedTS
        cols = self.selectedColumns
        if self.residualsTS is None:
            self.residualsTS = self.observedTS.subsetColumns(cols)
        self.residualsTS[cols] = self.observedTS[cols] - self.fittedTS[cols]
        for col in cols:
            self.residualsTS[col] = np.nan_to_num(self.residualsTS[col])

    def _residuals(self, params) -> np.ndarray:
        """
        Compute the residuals between objective and experimental data
        Handle nan values in observedTS. This internal-only method
        is implemented to maximize efficieency.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Instance Variables Updated
        --------------------------
        self.residualsTS

        Returns
        -------
        1-d ndarray of residuals
        """
        block = Logger.join(self._loggerPrefix, "fitModel._residuals")
        guid = self.logger.startBlock(block)
        ##V
        self.roadrunnerModel.reset()
        self._setupModel(params)
        #
        roadrunnerBlock = Logger.join(block, "roadrunner")
        roadrunnerGuid = self.logger.startBlock(roadrunnerBlock)
        ## V
        #
        data = self.roadrunnerModel.simulate(self.observedTS.start,
                                             self.observedTS.end,
                                             len(self.observedTS),
                                             self.selectedColumns)
        ## ^
        self.logger.endBlock(roadrunnerGuid)
        #
        tailBlock = Logger.join(block, "tail")
        tailGuid = self.logger.startBlock(tailBlock)
        ## V
        residualsArr = self._observedArr - data.flatten()
        residualsArr = np.nan_to_num(residualsArr)
        ## ^
        self.logger.endBlock(tailGuid)
        ##^
        self.logger.endBlock(guid)
        #
        # Used for detailed debugging
        if False:
            self.logger.details("_residuals/std(residuals): %f" %
                                np.std(residualsArr))
            self.logger.details("_residuals/params: %s" % str(params))
        return residualsArr

    def fitModel(self, params: lmfit.Parameters = None, max_nfev: int = 100):
        """
        Fits the model by adjusting values of parameters based on
        differences between simulated and provided values of
        floating species.

        Parameters
        ----------
        params: starting values of parameters
        max_nfev: maximum number of function evaluations

        Example
        -------
        f.fitModel()
        """
        ParameterDescriptor = collections.namedtuple(
            "ParameterDescriptor",
            "params method std minimizer minimizerResult")
        block = Logger.join(self._loggerPrefix, "fitModel")
        guid = self.logger.startBlock(block)
        self._initializeRoadrunnerModel()
        if self.parametersToFit is None:
            # Compute fit and residuals for base model
            self.params = None
        else:
            if params is None:
                params = self.mkParams()
            # Fit the model to the data using one or more methods.
            # Choose the result with the lowest residual standard deviation
            paramDct = {}
            for method in self._fitterMethods:
                for _ in range(self._numFitRepeat):
                    minimizer = lmfit.Minimizer(self._residuals,
                                                params,
                                                max_nfev=max_nfev)
                    try:
                        minimizerResult = minimizer.minimize(method=method,
                                                             max_nfev=max_nfev)
                    except Exception as excp:
                        msg = "Error minimizing for method: %s" % method
                        self.logger.error(msg, excp)
                        continue
                    params = minimizerResult.params
                    std = np.std(self._residuals(params))
                    if method in paramDct.keys():
                        if std >= paramDct[method].std:
                            continue
                    paramDct[method] = ParameterDescriptor(
                        params=params.copy(),
                        method=method,
                        std=std,
                        minimizer=minimizer,
                        minimizerResult=minimizerResult,
                    )
            if len(paramDct) == 0:
                msg = "*** Minimizer failed for this model and data."
                raise ValueError(msg)
            # Select the result that has the smallest residuals
            sortedMethods = sorted(paramDct.keys(),
                                   key=lambda m: paramDct[m].std)
            bestMethod = sortedMethods[0]
            self.params = paramDct[bestMethod].params
            self.minimizer = paramDct[bestMethod].minimizer
            self.minimizerResult = paramDct[bestMethod].minimizerResult
        # Ensure that residualsTS and fittedTS match the parameters
        self.updateFittedAndResiduals(params=self.params)
        self.logger.endBlock(guid)

    def getFittedModel(self):
        """
        Provides the roadrunner model with fitted parameters

        Returns
        -------
        ExtendedRoadrunner
        """
        self._checkFit()
        self.roadrunnerModel.reset()
        self._setupModel(self.params)
        return self.roadrunnerModel

    def _setupModel(self, params):
        """
        Sets up the model for use based on the parameter parameters

        Parameters
        ----------
        params: lmfit.Parameters

        """
        pp = params.valuesdict()
        for parameter in self.parametersToFit:
            try:
                self.roadrunnerModel.model[parameter] = pp[parameter]
            except Exception as err:
                msg = "_modelFitterCore/_setupModel: Could not set value for %s"  \
                      % parameter
                self.logger.error(msg, err)

    def mkParams(self, parameterDct: dict = None) -> lmfit.Parameters:
        """
        Constructs lmfit parameters based on specifications.

        Parameters
        ----------
        parameterDct: key=name, value=ParameterSpecification
        
        Returns
        -------
        lmfit.Parameters
        """
        def get(value, base_value, multiplier):
            if value is not None:
                return value
            return base_value * multiplier

        #
        if parameterDct is None:
            parameterDct = self.parameterDct
        params = lmfit.Parameters()
        for parameterName in self.parametersToFit:
            if parameterName in parameterDct.keys():
                specification = parameterDct[parameterName]
                value = get(specification.value, specification.value, 1.0)
                if value > 0:
                    lower_factor = LOWER_PARAMETER_MULT
                    upper_factor = UPPER_PARAMETER_MULT
                else:
                    upper_factor = UPPER_PARAMETER_MULT
                    lower_factor = LOWER_PARAMETER_MULT
                lower = get(specification.lower, specification.value,
                            lower_factor)
                upper = get(specification.upper, specification.value,
                            upper_factor)
                if np.isclose(lower - upper, 0):
                    upper = 0.0001
                try:
                    params.add(parameterName,
                               value=value,
                               min=lower,
                               max=upper)
                except Exception as err:
                    msg = "modelFitterCore/mkParams parameterName %s" \
                          % parameterName
                    self.logger.error(msg, err)
            else:
                value = np.mean([self.lowerBound, self.upperBound])
                params.add(parameterName,
                           value=value,
                           min=self.lowerBound,
                           max=self.upperBound)
        return params

    def _checkFit(self):
        if self.params is None:
            raise ValueError("Must use fitModel before using this method.")

    def serialize(self, path):
        """
        Serialize the model to a path.

        Parameters
        ----------
        path: str
            File path
        """
        newModelFitter = self.copy()
        with open(path, "wb") as fd:
            rpickle.dump(newModelFitter, fd)

    @classmethod
    def deserialize(cls, path):
        """
        Deserialize the model from a path.

        Parameters
        ----------
        path: str
            File path

        Return
        ------
        ModelFitter
            Model is initialized.
        """
        with open(path, "rb") as fd:
            fitter = rpickle.load(fd)
        fitter._initializeRoadrunnerModel()
        return fitter
Example #2
0
class TestLogger(unittest.TestCase):
    def setUp(self):
        self.remove()
        self.logger = Logger(toFile=LOG_PATH,
                             logPerformance=True,
                             logLevel=logs.LEVEL_MAX)

    def tearDown(self):
        self.remove()

    def remove(self):
        for ffile in FILES:
            if os.path.isfile(ffile):
                os.remove(ffile)

    def isFile(self):
        return os.path.isfile(LOG_PATH)

    def read(self):
        if not self.isFile():
            raise RuntimeError("Missing log file.")
        with open(LOG_PATH, "r") as fd:
            lines = fd.readlines()
        return lines

    def testConstructor(self):
        if IGNORE_TEST:
            return
        self.assertFalse(self.isFile())
        self.assertEqual(self.logger.logLevel, logs.LEVEL_MAX)

    def testFileDescriptor(self):
        if IGNORE_TEST:
            return
        fd = self.logger.getFileDescriptor()
        self.assertIsInstance(fd, io.TextIOWrapper)
        fd.close()

    def _checkMsg(self, msg):
        lines = self.read()
        true = any([MSG in t for t in lines])
        self.assertTrue(true)
        return lines

    def testWrite(self):
        if IGNORE_TEST:
            return
        self.logger._write(MSG, 0)
        _ = self._checkMsg(MSG)

    def _testApi(self, method, logLevel):
        if IGNORE_TEST:
            return
        logger = Logger(toFile=LOG_PATH, logLevel=logLevel)
        stmt = "logger.%s(MSG)" % method
        exec(stmt)
        line1s = self._checkMsg(MSG)
        #
        logger = Logger(toFile=LOG_PATH, logLevel=0)
        stmt = "logger.%s(MSG)" % method
        exec(stmt)
        line2s = self.read()
        self.assertEqual(len(line1s), len(line2s))

    def testActivity(self):
        if IGNORE_TEST:
            return
        self._testApi("activity", logs.LEVEL_ACTIVITY)

    def testResult(self):
        if IGNORE_TEST:
            return
        self._testApi("result", logs.LEVEL_RESULT)

    def testStatus(self):
        if IGNORE_TEST:
            return
        self._testApi("status", logs.LEVEL_STATUS)

    def testException(self):
        if IGNORE_TEST:
            return
        self._testApi("status", logs.LEVEL_EXCEPTION)

    def testStartBlock(self):
        if IGNORE_TEST:
            return
        guid = self.logger.startBlock(BLOCK1)
        self.assertLess(guid, BlockSpecification.guid)
        self.assertEqual(len(self.logger.blockDct), 1)

    def testEndBlock(self):
        if IGNORE_TEST:
            return
        guid1 = self.logger.startBlock(BLOCK1)
        guid2 = self.logger.startBlock(BLOCK2)
        self.logger.endBlock(guid2)
        self.logger.endBlock(guid1)
        self.assertGreater(self.logger.statisticDct[BLOCK1].total,
                           self.logger.statisticDct[BLOCK2].total)

    def testNoLogPerformance(self):
        if IGNORE_TEST:
            return
        logger = Logger(toFile=LOG_PATH,
                        logPerformance=False,
                        logLevel=logs.LEVEL_MAX)
        guid = logger.startBlock(BLOCK1)
        self.assertEqual(len(self.logger.blockDct), 0)
        logger.endBlock(guid)
        self.assertEqual(len(self.logger.blockDct), 0)

    def testPerformanceReport(self):
        if IGNORE_TEST:
            return

        def test(numBlock, sleepTime):
            logger = Logger(logPerformance=True)
            for idx in range(numBlock):
                block = "blk_%d" % idx
                guid = logger.startBlock(block)
                time.sleep(sleepTime)
                logger.endBlock(guid)
            df = logger.performanceDF
            self.assertLess(np.abs(sleepTime - df["mean"].mean()), sleepTime)
            self.assertEqual(df["count"].mean(), 1.0)

        #
        test(3, 0.1)
        test(30, 0.1)

    def testJoin(self):
        if IGNORE_TEST:
            return
        NAMES = ["aa", "bbb", "z"]
        result = Logger.join(*NAMES)
        for name in NAMES:
            self.assertGreaterEqual(result.index(name), 0)

    def testCopy(self):
        if IGNORE_TEST:
            return
        newLogger = self.logger.copy()
        self.assertTrue(self.logger.equals(newLogger))
Example #3
0
class ModelFitterCore(rpickle.RPickler):

    # Subclasses used in interface
    class OptimizerMethod():
        def __init__(self, method, kwargs):
            self.method = method
            self.kwargs = kwargs

    def __init__(
        self,
        modelSpecification,
        observedData,
        parametersToFit=None,
        selectedColumns=None,
        fitterMethods=None,
        numFitRepeat=1,
        bootstrapMethods=None,
        parameterLowerBound=PARAMETER_LOWER_BOUND,
        parameterUpperBound=PARAMETER_UPPER_BOUND,
        parameterDct=None,
        fittedDataTransformDct=None,
        logger=Logger(),
        isPlot=True,
        _loggerPrefix="",
        # The following must be kept in sync with ModelFitterBootstrap.bootstrap
        numIteration: int = 10,
        reportInterval: int = 1000,
        maxProcess: int = None,
        serializePath: str = None,
    ):
        """
        Constructs estimates of parameter values.

        Parameters
        ----------
        modelSpecification: ExtendedRoadRunner/str
            roadrunner model or antimony model
        observedData: NamedTimeseries/str
            str: path to CSV file
        parametersToFit: list-str/None
            parameters in the model that you want to fit
            if None, no parameters are fit
        selectedColumns: list-str
            species names you wish use to fit the model
            default: all columns in observedData
        parameterLowerBound: float
            lower bound for the fitting parameters
        parameterUpperBound: float
            upper bound for the fitting parameters
        parameterDct: dict
            key: parameter name
            value: triple - (lowerVange, startingValue, upperRange)
        fittedDataTransformDct: dict
            key: column in selectedColumns
            value: function of the data in selectedColumns;
                   input: NamedTimeseries
                   output: array for the values of the column
        logger: Logger
        fitterMethods: str/list-str/list-OptimizerMethod
            method used for minimization in fitModel
        numFitRepeat: int
            number of times fitting is repeated for a method
        bootstrapMethods: str/list-str/list-OptimizerMethod
            method used for minimization in bootstrap
        numIteration: number of bootstrap iterations
        reportInterval: number of iterations between progress reports
        maxProcess: Maximum number of processes to use. Default: numCPU
        serializePath: Where to serialize the fitter after bootstrap

        Usage
        -----
        parameterDct = {
            "k1": (1, 5, 10),  # name of parameter: low value, initial, high
            "k2": (2, 3, 6)}
        ftter = ModelFitter(roadrunnerModel, "observed.csv",
            parameterDct=parameterDct)
        fitter.fitModel()  # Do the fit
        fitter.bootstrap()  # Estimate parameter variance with bootstrap
        """
        if modelSpecification is not None:
            # Not the default constructor
            self._loggerPrefix = _loggerPrefix
            self.modelSpecification = modelSpecification
            self.parametersToFit = parametersToFit
            self.lowerBound = parameterLowerBound
            self.upperBound = parameterUpperBound
            self.bootstrapKwargs = dict(
                numIteration=numIteration,
                reportInterval=reportInterval,
                maxProcess=maxProcess,
                serializePath=serializePath,
            )
            self.parameterDct = ModelFitterCore._updateParameterDct(
                parameterDct)
            self._numFitRepeat = numFitRepeat
            if self.parametersToFit is None:
                self.parametersToFit = list(self.parameterDct.keys())
            self.observedTS = observedData
            if self.observedTS is not None:
                self.observedTS = mkNamedTimeseries(observedData)
            #
            self.fittedDataTransformDct = fittedDataTransformDct
            #
            if (selectedColumns is None) and (self.observedTS is not None):
                selectedColumns = self.observedTS.colnames
            self.selectedColumns = selectedColumns
            if self.observedTS is not None:
                self._observedArr = self.observedTS[
                    self.selectedColumns].flatten()
            else:
                self._observedArr = None
            # Other internal state
            self._fitterMethods = self._makeMethods(fitterMethods,
                                                    METHOD_FITTER_DEFAULTS)
            self._bootstrapMethods = self._makeMethods(
                bootstrapMethods, METHOD_BOOTSTRAP_DEFAULTS)
            if isinstance(self._bootstrapMethods, str):
                self._bootstrapMethods = [self._bootstrapMethods]
            self._isPlot = isPlot
            self._plotter = tp.TimeseriesPlotter(isPlot=self._isPlot)
            self._plotFittedTS = None  # Timeseries that is plotted
            self.logger = logger
            # The following are calculated during fitting
            self.roadrunnerModel = None
            self.minimizer = None  # lmfit.minimizer
            self.minimizerResult = None  # Results of minimization
            self.params = None  # params property in lmfit.minimizer
            self.fittedTS = self.observedTS.copy(
                isInitialize=True)  # Initialize
            self.residualsTS = None  # Residuals for selectedColumns
            self.bootstrapResult = None  # Result from bootstrapping
            # Validation checks
            self._validateFittedDataTransformDct()
            self._bestParameters = _BestParameters(rssq=None, params=None)
        else:
            pass

    def _makeMethods(self, methods, default):
        """
        Creates a method dictionary.

        Parameters
        ----------
        methods: str/list-str/dict
            method used for minimization in fitModel
            dict: key-method, value-optional parameters

        Returns
        -------
        list-OptimizerMethod
            key: method name
            value: dict of optional parameters
        """
        if methods is None:
            methods = default
        if isinstance(methods, str):
            if methods == METHOD_BOTH:
                methods = METHOD_FITTER_DEFAULTS
            else:
                methods = [methods]
        if isinstance(methods, list):
            if isinstance(methods[0], str):
                results = [
                    ModelFitterCore.OptimizerMethod(method=m, kwargs={})
                    for m in methods
                ]
            else:
                results = methods
        else:
            raise RuntimeError("Must be a list")
        trues = [
            isinstance(m, ModelFitterCore.OptimizerMethod) for m in results
        ]
        if not all(trues):
            raise ValueError("Invalid methods: %s" % str(methods))
        return results

    @classmethod
    def mkParameters(
            cls,
            parameterDct: dict = None,
            parametersToFit: list = None,
            logger: Logger = Logger(),
            lowerBound: float = PARAMETER_LOWER_BOUND,
            upperBound: float = PARAMETER_UPPER_BOUND) -> lmfit.Parameters:
        """
        Constructs lmfit parameters based on specifications.

        Parameters
        ----------
        parameterDct: key=name, value=ParameterSpecification
        parametersToFit: list of parameters to fit
        logger: error logger
        lowerBound: lower value of range for parameters
        upperBound: upper value of range for parameters

        Returns
        -------
        lmfit.Parameters
        """
        def get(value, base_value, multiplier):
            if value is not None:
                return value
            return base_value * multiplier

        #
        if (parametersToFit is None) and (parameterDct is None):
            raise RuntimeError("Must specify one of these parameters.")
        if parameterDct is None:
            parameterDct = {}
        if parametersToFit is None:
            parametersToFit = parameterDct.keys()
        if logger is None:
            logger = logger()
        params = lmfit.Parameters()
        for parameterName in parametersToFit:
            if parameterName in parameterDct.keys():
                specification = parameterDct[parameterName]
                value = get(specification.value, specification.value, 1.0)
                if value > 0:
                    lower_factor = LOWER_PARAMETER_MULT
                    upper_factor = UPPER_PARAMETER_MULT
                else:
                    upper_factor = UPPER_PARAMETER_MULT
                    lower_factor = LOWER_PARAMETER_MULT
                lower = get(specification.lower, specification.value,
                            lower_factor)
                upper = get(specification.upper, specification.value,
                            upper_factor)
                if np.isclose(lower - upper, 0):
                    upper = 0.0001
                try:
                    params.add(parameterName,
                               value=value,
                               min=lower,
                               max=upper)
                except Exception as err:
                    msg = "modelFitterCore/mkParameters parameterName %s" \
                          % parameterName
                    logger.error(msg, err)
            else:
                value = np.mean([lowerBound, upperBound])
                params.add(parameterName,
                           value=value,
                           min=lowerBound,
                           max=upperBound)
        return params

    @classmethod
    def initializeRoadrunnerModel(cls, modelSpecification):
        """
        Sets self.roadrunnerModel.

        Parameters
        ----------
        modelSpecification: ExtendedRoadRunner/str

        Returns
        -------
        ExtendedRoadRunner
        """
        if isinstance(modelSpecification,
                      te.roadrunner.extended_roadrunner.ExtendedRoadRunner):
            roadrunnerModel = modelSpecification
        elif isinstance(modelSpecification, str):
            roadrunnerModel = te.loada(modelSpecification)
        else:
            msg = 'Invalid model.'
            msg = msg + "\nA model must either be a Roadrunner model "
            msg = msg + "an Antimony model."
            raise ValueError(msg)
        return roadrunnerModel

    @classmethod
    def setupModel(cls, roadrunner, parameters, logger=Logger()):
        """
        Sets up the model for use based on the parameter parameters

        Parameters
        ----------
        roadrunner: ExtendedRoadRunner
        parameters: lmfit.Parameters
        logger Logger
        """
        pp = parameters.valuesdict()
        for parameter in pp.keys():
            try:
                roadrunner.model[parameter] = pp[parameter]
            except Exception as err:
                msg = "_modelFitterCore.setupModel: Could not set value for %s"  \
                      % parameter
                logger.error(msg, err)

    @classmethod
    def runSimulation(
            cls,
            parameters=None,
            roadrunner=None,
            startTime=0,
            endTime=5,
            numPoint=30,
            selectedColumns=None,
            returnDataFrame=True,
            _logger=Logger(),
            _loggerPrefix="",
    ):
        """
        Runs a simulation. Defaults to parameter values in the simulation.

        Parameters
       ----------
        roadrunner: ExtendedRoadRunner/str
            Roadrunner model
        parameters: lmfit.Parameters
            lmfit parameters
        startTime: float
            start time for the simulation
        endTime: float
            end time for the simulation
        numPoint: int
            number of points in the simulation
        selectedColumns: list-str
            output columns in simulation
        returnDataFrame: bool
            return a DataFrame
        _logger: Logger
        _loggerPrefix: str


        Return
        ------
        NamedTimeseries (or None if fail to converge)
        """
        if isinstance(roadrunner, str):
            roadrunner = cls.initializeRoadrunnerModel(roadrunner)
        else:
            roadrunner.reset()
        if parameters is not None:
            # Parameters have been specified
            cls.setupModel(roadrunner, parameters, logger=_logger)
        # Do the simulation
        if selectedColumns is not None:
            newSelectedColumns = list(selectedColumns)
            if TIME not in newSelectedColumns:
                newSelectedColumns.insert(0, TIME)
            try:
                data = roadrunner.simulate(startTime, endTime, numPoint,
                                           newSelectedColumns)
            except Exception as err:
                _logger.error("Roadrunner exception: ", err)
                data = None
        else:
            try:
                data = roadrunner.simulate(startTime, endTime, numPoint)
            except Exception as err:
                _logger.exception("Roadrunner exception: %s", err)
                data = None
        if data is None:
            return data
        fittedTS = NamedTimeseries(namedArray=data)
        if returnDataFrame:
            result = fittedTS.to_dataframe()
        else:
            result = fittedTS
        return result

    @classmethod
    def rpConstruct(cls):
        """
        Overrides rpickler.rpConstruct to create a method that
        constructs an instance without arguments.

        Returns
        -------
        Instance of cls
        """
        return cls(None, None, None)

    def rpRevise(self):
        """
        Overrides rpickler.
        """
        if "logger" not in self.__dict__.keys():
            self.logger = Logger()

    def _validateFittedDataTransformDct(self):
        if self.fittedDataTransformDct is not None:
            keySet = set(self.fittedDataTransformDct.keys())
            selectedColumnsSet = self.selectedColumns
            if (keySet is not None) and (selectedColumnsSet is not None):
                excess = set(keySet).difference(selectedColumnsSet)
                if len(excess) > 0:
                    msg = "Columns not in selectedColumns: %s" % str(excess)
                    raise ValueError(msg)

    def _transformFittedTS(self, data):
        """
        Updates the fittedTS taking into account required transformations.

        Parameters
        ----------
        data: np.ndarray

        Results
        ----------
        NamedTimeseries
        """
        colnames = list(self.selectedColumns)
        colnames.insert(0, TIME)
        fittedTS = NamedTimeseries(array=data[:, :], colnames=colnames)
        if self.fittedDataTransformDct is not None:
            for column, func in self.fittedDataTransformDct.items():
                if func is not None:
                    fittedTS[column] = func(fittedTS)
        return fittedTS

    @staticmethod
    def _updateParameterDct(parameterDct):
        """
        Handles values that are tuples instead of ParameterSpecification.
        """
        if parameterDct is None:
            parameterDct = {}
        dct = dict(parameterDct)
        for name, value in parameterDct.items():
            if isinstance(value, tuple):
                dct[name] = ParameterSpecification(lower=value[0],
                                                   upper=value[1],
                                                   value=value[2])
        return dct

    @staticmethod
    def addParameter(parameterDct: dict, name: str, lower: float, upper: float,
                     value: float):
        """
        Adds a parameter to a list of parameters.

        Parameters
        ----------
        parameterDct: parameter dictionary to agument
        name: parameter name
        lower: lower range of parameter value
        upper: upper range of parameter value
        value: initial value

        Returns
        -------
        dict
        """
        parameterDct[name] = ParameterSpecification(lower=lower,
                                                    upper=upper,
                                                    value=value)

    def _adjustNames(self, antimonyModel:str, observedTS:NamedTimeseries)  \
          ->typing.Tuple[NamedTimeseries, list]:
        """
        Antimony exports can change the names of floating species
        by adding a "_" at the end. Check for this and adjust
        the names in observedTS.

        Return
        ------
        NamedTimeseries: newObservedTS
        list: newSelectedColumns
        """
        rr = te.loada(antimonyModel)
        dataNames = rr.simulate().colnames
        names = ["[%s]" % n for n in observedTS.colnames]
        missingNames = [n[1:-1] for n in set(names).difference(dataNames)]
        newSelectedColumns = list(self.selectedColumns)
        if len(missingNames) > 0:
            newObservedTS = observedTS.copy()
            self.logger.exception("Missing names in antimony export: %s" %
                                  str(missingNames))
            for name in observedTS.colnames:
                missingName = "%s_" % name
                if name in missingNames:
                    newObservedTS = newObservedTS.rename(name, missingName)
                    newSelectedColumns.remove(name)
                    newSelectedColumns.append(missingName)
        else:
            newObservedTS = observedTS
        return newObservedTS, newSelectedColumns

    def copy(self, isKeepLogger=False):
        """
        Creates a copy of the model fitter.
        Preserves the user-specified settings and the results
        of bootstrapping.
        """
        if not isinstance(self.modelSpecification, str):
            try:
                modelSpecification = self.modelSpecification.getAntimony()
            except Exception as err:
                self.logger.error(
                    "Problem wth conversion to Antimony. Details:", err)
                raise ValueError("Cannot proceed.")
            observedTS, selectedColumns = self._adjustNames(
                modelSpecification, self.observedTS)
        else:
            modelSpecification = self.modelSpecification
            observedTS = self.observedTS.copy()
            selectedColumns = self.selectedColumns
        #
        if isKeepLogger:
            logger = self.logger
        elif self.logger is not None:
            logger = self.logger.copy()
        else:
            logger = None
        newModelFitter = self.__class__(
            copy.deepcopy(modelSpecification),
            observedTS,
            copy.deepcopy(self.parametersToFit),
            selectedColumns=selectedColumns,
            fitterMethods=self._fitterMethods,
            bootstrapMethods=self._bootstrapMethods,
            parameterLowerBound=self.lowerBound,
            parameterUpperBound=self.upperBound,
            parameterDct=copy.deepcopy(self.parameterDct),
            fittedDataTransformDct=copy.deepcopy(self.fittedDataTransformDct),
            logger=logger,
            isPlot=self._isPlot)
        if self.bootstrapResult is not None:
            newModelFitter.bootstrapResult = self.bootstrapResult.copy()
            newModelFitter.params = newModelFitter.bootstrapResult.params
        else:
            newModelFitter.bootstrapResult = None
            newModelFitter.params = self.params
        return newModelFitter

    def initializeRoadRunnerModel(self):
        """
        Sets self.roadrunnerModel.
        """
        self.roadrunnerModel = ModelFitterCore.initializeRoadrunnerModel(
            self.modelSpecification)

    def getDefaultParameterValues(self):
        """
        Obtain the original values of parameters.

        Returns
        -------
        dict:
            key: parameter name
            value: value of parameter
        """
        dct = {}
        self.initializeRoadRunnerModel()
        self.roadrunnerModel.reset()
        for parameterName in self.parametersToFit:
            dct[parameterName] = self.roadrunnerModel.model[parameterName]
        return dct

    def simulate(self,
                 params=None,
                 startTime=None,
                 endTime=None,
                 numPoint=None):
        """
        Runs a simulation. Defaults to parameter values in the simulation.

        Parameters
       ----------
        params: lmfit.Parameters
        startTime: float
        endTime: float
        numPoint: int

        Return
        ------
        NamedTimeseries
        """
        def setValue(default, parameter):
            # Sets to default if parameter unspecified
            if parameter is None:
                return default
            return parameter

        #
        startTime = setValue(self.observedTS.start, startTime)
        endTime = setValue(self.observedTS.end, endTime)
        numPoint = setValue(len(self.observedTS), numPoint)
        #
        if self.roadrunnerModel is None:
            self.initializeRoadRunnerModel()
        #
        return ModelFitterCore.runSimulation(
            parameters=params,
            roadrunner=self.roadrunnerModel,
            startTime=startTime,
            endTime=endTime,
            numPoint=numPoint,
            selectedColumns=self.selectedColumns,
            _logger=self.logger,
            _loggerPrefix=self._loggerPrefix,
            returnDataFrame=False)

    def updateFittedAndResiduals(self, **kwargs) -> np.ndarray:
        """
        Updates values of self.fittedTS and self.residualsTS
        based on self.params.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Instance Variables Updated
        --------------------------
        self.fittedTS
        self.residualsTS

        Returns
        -------
        1-d ndarray of residuals
        """
        self.fittedTS = self.simulate(**kwargs)  # Updates self.fittedTS
        residualsArr = self._residuals(self.params)
        numRow = len(self.fittedTS)
        numCol = len(residualsArr) // numRow
        residualsArr = np.reshape(residualsArr, (numRow, numCol))
        cols = self.selectedColumns
        if self.residualsTS is None:
            self.residualsTS = self.observedTS.subsetColumns(cols)
        self.residualsTS[cols] = residualsArr

    def _residuals(self, params) -> np.ndarray:
        """
        Compute the residuals between objective and experimental data
        Handle nan values in observedTS. This internal-only method
        is implemented to maximize efficieency.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Returns
        -------
        1-d ndarray of residuals
        """
        data = ModelFitterCore.runSimulation(
            parameters=params,
            roadrunner=self.roadrunnerModel,
            startTime=self.observedTS.start,
            endTime=self.observedTS.end,
            numPoint=len(self.observedTS),
            selectedColumns=self.selectedColumns,
            _logger=self.logger,
            _loggerPrefix=self._loggerPrefix,
            returnDataFrame=False)
        if data is None:
            residualsArr = np.repeat(LARGE_RESIDUAL, len(self._observedArr))
        else:
            residualsArr = self._observedArr - data.flatten()
            residualsArr = np.nan_to_num(residualsArr)
        rssq = sum(residualsArr**2)
        if (self._bestParameters.rssq is None)  \
              or (rssq < self._bestParameters.rssq):
            self._bestParameters = _BestParameters(params=params.copy(),
                                                   rssq=rssq)
        return residualsArr

    def fitModel(self, params: lmfit.Parameters = None, max_nfev=100):
        """
        Fits the model by adjusting values of parameters based on
        differences between simulated and provided values of
        floating species.

        Parameters
        ----------
        params: starting values of parameters

        Example
        -------
        f.fitModel()
        """
        ParameterDescriptor = collections.namedtuple(
            "ParameterDescriptor",
            "params method rssq kwargs minimizer minimizerResult")
        MAX_NFEV = "max_nfev"
        block = Logger.join(self._loggerPrefix, "fitModel")
        guid = self.logger.startBlock(block)
        self.initializeRoadRunnerModel()
        self.params = None
        if self.parametersToFit is not None:
            if params is None:
                params = self.mkParams()
            # Fit the model to the data using one or more methods.
            # Choose the result with the lowest residual standard deviation
            paramResults = []
            lastExcp = None
            for idx, optimizerMethod in enumerate(self._fitterMethods):
                method = optimizerMethod.method
                kwargs = optimizerMethod.kwargs
                if MAX_NFEV not in kwargs:
                    kwargs[MAX_NFEV] = max_nfev
                for _ in range(self._numFitRepeat):
                    self._bestParameters = _BestParameters(params=None,
                                                           rssq=None)
                    minimizer = lmfit.Minimizer(self._residuals, params)
                    try:
                        minimizerResult = minimizer.minimize(method=method,
                                                             **kwargs)
                    except Exception as excp:
                        lastExcp = excp
                        msg = "Error minimizing for method: %s" % method
                        self.logger.error(msg, excp)
                        continue
                    params = self._bestParameters.params.copy()
                    rssq = np.sum(self._residuals(params)**2)
                    if len(paramResults) > idx:
                        if rssq >= paramResults[idx].rssq:
                            continue
                    parameterDescriptor = ParameterDescriptor(
                        params=params,
                        method=method,
                        rssq=rssq,
                        kwargs=dict(kwargs),
                        minimizer=minimizer,
                        minimizerResult=minimizerResult,
                    )
                    paramResults.append(parameterDescriptor)
            if len(paramResults) == 0:
                msg = "*** Minimizer failed for this model and data."
                self.logger.error(msg, lastExcp)
            else:
                # Select the result that has the smallest residuals
                sortedMethods = sorted(paramResults, key=lambda r: r.rssq)
                bestMethod = sortedMethods[0]
                self.params = bestMethod.params
                self.minimizer = bestMethod.minimizer
                self.minimizerResult = bestMethod.minimizerResult
        # Ensure that residualsTS and fittedTS match the parameters
        self.updateFittedAndResiduals(params=self.params)
        self.logger.endBlock(guid)

    def getFittedModel(self):
        """
        Provides the roadrunner model with fitted parameters

        Returns
        -------
        ExtendedRoadrunner
        """
        self._checkFit()
        self.roadrunnerModel.reset()
        self._setupModel(self.params)
        return self.roadrunnerModel

    def _setupModel(self, parameters):
        """
        Sets up the model for use based on the parameter parameters

        Parameters
        ----------
        parameters: lmfit.Parameters

        """
        ModelFitterCore.setupModel(self.roadrunnerModel,
                                   parameters,
                                   logger=self.logger)

    def mkParams(self, parameterDct: dict = None) -> lmfit.Parameters:
        """
        Constructs lmfit parameters based on specifications.

        Parameters
        ----------
        parameterDct: key=name, value=ParameterSpecification

        Returns
        -------
        lmfit.Parameters
        """
        if parameterDct is None:
            parameterDct = self.parameterDct
        return ModelFitterCore.mkParameters(
            parameterDct,
            parametersToFit=self.parametersToFit,
            logger=self.logger,
            lowerBound=self.lowerBound,
            upperBound=self.upperBound)

    def _checkFit(self):
        if self.params is None:
            raise ValueError("Must use fitModel before using this method.")

    def serialize(self, path):
        """
        Serialize the model to a path.

        Parameters
        ----------
        path: str
            File path
        """
        newModelFitter = self.copy()
        with open(path, "wb") as fd:
            rpickle.dump(newModelFitter, fd)

    @classmethod
    def deserialize(cls, path):
        """
        Deserialize the model from a path.

        Parameters
        ----------
        path: str
            File path

        Return
        ------
        ModelFitter
            Model is initialized.
        """
        with open(path, "rb") as fd:
            fitter = rpickle.load(fd)
        fitter.initializeRoadRunnerModel()
        return fitter