예제 #1
0
    def _get_new_estpar(self, estpar, rel_step, sign):
        """
        Returns new ``EstPar`` object with modified value,
        according to ``sign`` and ``max_change``.

        :param estpar: EstPar
        :param rel_step: float, (0-1)
        :param sign: string, '+' or '-'
        :return: EstPar
        """
        sign_mltp = None
        if sign == "+":
            sign_mltp = 1.0
        elif sign == "-":
            sign_mltp = -1.0
        else:
            print("Unrecognized sign ({})".format(sign))

        new_value = estpar.value * (1 + rel_step * sign_mltp)

        if new_value > estpar.hi:
            new_value = estpar.hi
        if new_value < estpar.lo:
            new_value = estpar.lo

        return EstPar(estpar.name, estpar.lo, estpar.hi, new_value)
예제 #2
0
파일: scipy.py 프로젝트: taoyyt/modest-py
    def __init__(self, fmu_path, inp, known, est, ideal,
                 solver, options={}, ftype='RMSE'):
        """
        :param fmu_path: string, absolute path to the FMU
        :param inp: DataFrame, columns with input timeseries, index in seconds
        :param known: Dictionary, key=parameter_name, value=value
        :param est: Dictionary, key=parameter_name, value=tuple
                    (guess value, lo limit, hi limit), guess can be None
        :param ideal: DataFrame, ideal solution to be compared with model
                      outputs (variable names must match)
        :param solver: str, solver type (e.g. 'TNC', 'L-BFGS-B', 'SLSQP')
        :param options: dict, additional options passed to the SciPy's solver
        :param ftype: str, cost function type. Currently 'NRMSE' (advised
                      for multi-objective estimation) or 'RMSE'.
        """
        self.logger = logging.getLogger(type(self).__name__)

        assert inp.index.equals(ideal.index), \
            'inp and ideal indexes are not matching'

        # Solver type
        self.solver = solver

        # Default solver options
        self.options = {'disp': True, 'iprint': 2, 'maxiter': 500}

        if len(options) > 0:
            for key in options:
                self.options[key] = options[key]

        # Cost function type
        self.ftype = ftype

        # Ideal solution
        self.ideal = ideal

        # Adjust COM_POINTS
        # CVODE solver complains without "-1"
        SCIPY.COM_POINTS = len(self.ideal) - 1

        # Inputs
        self.inputs = inp

        # Known parameters to DataFrame
        known_df = pd.DataFrame()
        for key in known:
            assert known[key] is not None, \
                'None is not allowed in known parameters (parameter {})' \
                .format(key)
            known_df[key] = [known[key]]

        # est: dictionary to a list with EstPar instances
        self.est = list()
        for key in est:
            lo = est[key][1]
            hi = est[key][2]
            if est[key][0] is None:  # If guess is None, assume random guess
                v = lo + random() * (hi - lo)
            else:  # Else, take the guess passed in est
                v = est[key][0]
            self.est.append(EstPar(name=key, value=v, lo=lo, hi=hi))
        est = self.est

        # Model
        output_names = [var for var in ideal]
        self.model = SCIPY._get_model_instance(fmu_path, inp, known_df, est, output_names)

        # Outputs
        self.summary = pd.DataFrame()
        self.res = pd.DataFrame()
        self.best_err = 1e7

        # Temporary placeholder for summary
        # It needs to be stored as class variable, because it has to be updated
        # from a static method used as callback
        self.summary_cols = \
            [x.name for x in self.est] + [SCIPY.ERR, SCIPY.METHOD]
        SCIPY.TMP_SUMMARY = pd.DataFrame(columns=self.summary_cols)

        # Log
        self.logger.info('SCIPY initialized... =========================')
예제 #3
0
    def __init__(
        self,
        fmu_path,
        inp,
        known,
        est,
        ideal,
        rel_step=0.01,
        tol=0.0001,
        try_lim=30,
        maxiter=300,
        ftype="RMSE",
    ):
        """
        :param fmu_path: string, absolute path to the FMU
        :param inp: DataFrame, columns with input timeseries, index in seconds
        :param known: Dictionary, key=parameter_name, value=value
        :param est: Dictionary, key=parameter_name, value=tuple
                    (guess value, lo limit, hi limit), guess can be None
        :param ideal: DataFrame, ideal solution to be compared
                      with model outputs (variable names must match)
        :param rel_step: float, initial relative step when modifying parameters
        :param tol: float, stopping criterion, when rel_step
                    becomes smaller than tol algorithm stops
        :param try_lim: integer, maximum number of tries to decrease rel_step
        :param maxiter: integer, maximum number of iterations
        :param string ftype: Cost function type. Currently 'NRMSE' or 'RMSE'
        """
        self.logger = logging.getLogger(type(self).__name__)

        assert inp.index.equals(
            ideal.index), "inp and ideal indexes are not matching"
        assert (rel_step > tol
                ), "Relative step must not be smaller than the stop criterion"

        # Cost function type
        self.ftype = ftype

        # Ideal solution
        self.ideal = ideal

        # Adjust COM_POINTS
        # CVODE solver complains without "-1"
        PS.COM_POINTS = len(self.ideal) - 1

        # Inputs
        self.inputs = inp

        # Known parameters to DataFrame
        known_df = pd.DataFrame()
        for key in known:
            assert (
                known[key] is not None
            ), "None is not allowed in known parameters " "(parameter {})".format(
                key)
            known_df[key] = [known[key]]

        # est: dictionary to a list with EstPar instances
        self.est = list()
        for key in est:
            lo = est[key][1]
            hi = est[key][2]
            if est[key][0] is None:  # If guess is None, assume random guess
                v = lo + random() * (hi - lo)
            else:  # Else, take the guess passed in est
                v = est[key][0]
            self.est.append(EstPar(name=key, value=v, lo=lo, hi=hi))
        est = self.est

        # Model
        output_names = [var for var in ideal]
        self.model = PS._get_model_instance(fmu_path, inp, known_df, est,
                                            output_names)

        # Initial value for relative parameter step (0-1)
        self.rel_step = rel_step

        # Min. allowed relative parameter change (0-1)
        # PS stops when self.max_change < tol
        self.tol = tol

        # Max. number of iterations without moving to a new point
        self.try_lim = try_lim

        # Max. number of iterations in total
        self.max_iter = maxiter

        # Outputs
        self.summary = pd.DataFrame()
        self.res = pd.DataFrame()

        self.logger.info(
            "Pattern Search initialized... =========================")
예제 #4
0
    def __init__(
        self,
        fmu_path,
        inp,
        known,
        est,
        ideal,
        maxiter=100,
        tol=0.001,
        look_back=10,
        pop_size=40,
        uniformity=0.5,
        mut=0.05,
        mut_inc=0.3,
        trm_size=6,
        ftype="RMSE",
        init_pop=None,
        lhs=False,
    ):
        """
        The population can be initialized in various ways:
        - if `init_pop` is None, one individual is initialized using
          initial guess from `est`
        - if `init_pop` contains less individuals than `pop_size`,
          then the rest is random
        - if `init_pop` == `pop_size` then no random individuals are generated

        :param fmu_path: string, absolute path to the FMU
        :param inp: DataFrame, columns with input timeseries, index in seconds
        :param known: Dictionary, key=parameter_name, value=value
        :param est: Dictionary, key=parameter_name, value=tuple
                    (guess value, lo limit, hi limit), guess can be None
        :param ideal: DataFrame, ideal solution to be compared with model
                      outputs (variable names must match)
        :param maxiter: int, maximum number of generations
        :param tol: float, when error does not decrease by more than
                    ``tol`` for the last ``lookback`` generations,
                    simulation stops
        :param look_back: int, number of past generations to track
                          the error decrease (see ``tol``)
        :param pop_size: int, size of the population
        :param uniformity: float (0.-1.), uniformity rate, affects gene
                           exchange in the crossover operation
        :param mut: float (0.-1.), mutation rate, specifies how often genes
                    are to be mutated to a random value,
                    helps to reach the global optimum
        :param mut_inc: float (0.-1.), increased mutation rate, specifies
                        how often genes are to be mutated by a
                        small amount, used when the population diversity
                        is low, helps to reach a local optimum
        :param trm_size: int, size of the tournament
        :param string ftype: Cost function type. Currently 'NRMSE'
                             (advised for multi-objective estimation)
                             or 'RMSE'.
        :param DataFrame init_pop: Initial population. DataFrame with
                                   estimated parameters. If None, takes
                                   initial guess from est.
        :param bool lhs: If True, init_pop and initial guess in est are
                         neglected, and the population is chosen using
                         Lating Hypercube Sampling.
        """
        self.logger = logging.getLogger(type(self).__name__)

        deprecated_msg = "This GA implementation is deprecated. Use MODESTGA instead."
        print(deprecated_msg)
        self.logger.warning(
            "This GA implementation is deprecated. Use MODESTGA instead."
        )

        self.logger.info("GA constructor invoked")

        assert inp.index.equals(ideal.index), "inp and ideal indexes are not matching"

        # Evolution parameters
        algorithm.UNIFORM_RATE = uniformity
        algorithm.MUT_RATE = mut
        algorithm.MUT_RATE_INC = mut_inc
        algorithm.TOURNAMENT_SIZE = int(trm_size)

        self.max_generations = maxiter
        self.tol = tol
        self.look_back = look_back

        # History of fittest errors from each generation (list of floats)
        self.fittest_errors = list()

        # History of all estimates and errors from all individuals
        self.all_estim_and_err = pd.DataFrame()

        # Initiliaze EstPar objects
        estpars = list()
        for key in sorted(est.keys()):
            self.logger.info(
                "Add {} (initial guess={}) to estimated parameters".format(
                    key, est[key][0]
                )
            )
            estpars.append(
                EstPar(name=key, value=est[key][0], lo=est[key][1], hi=est[key][2])
            )

        # Put known into DataFrame
        known_df = pd.DataFrame()
        for key in known:
            assert (
                known[key] is not None
            ), "None is not allowed in known parameters (parameter {})".format(key)
            known_df[key] = [known[key]]
            self.logger.info("Known parameters:\n{}".format(str(known_df)))

        # If LHS initialization, init_pop is disregarded
        if lhs:
            self.logger.info("LHS initialization")
            init_pop = GA._lhs_init(
                par_names=[p.name for p in estpars],
                bounds=[(p.lo, p.hi) for p in estpars],
                samples=pop_size,
                criterion="c",
            )
            self.logger.debug("Current population:\n{}".format(str(init_pop)))
        # Else, if no init_pop provided, generate one individual
        # based on initial guess from `est`
        elif init_pop is None:
            self.logger.info(
                "No initial population provided, one individual will be based "
                "on the initial guess and the other will be random"
            )
            init_pop = pd.DataFrame({k: [est[k][0]] for k in est})
            self.logger.debug("Current population:\n{}".format(str(init_pop)))

        # Take individuals from init_pop and add random individuals
        # until pop_size == len(init_pop)
        # (the number of individuals in init_pop can be lower than
        # the desired pop_size)
        if init_pop is not None:
            missing = pop_size - init_pop.index.size
            self.logger.debug("Missing individuals = {}".format(missing))
            if missing > 0:
                self.logger.debug("Add missing individuals (random)...")
                while missing > 0:
                    init_pop = init_pop.append(
                        {
                            key: random.random() * (est[key][2] - est[key][1])
                            + est[key][1]
                            for key in sorted(est.keys())
                        },
                        ignore_index=True,
                    )
                    missing -= 1
            self.logger.debug("Current population:\n{}".format(str(init_pop)))

        # Initialize population
        self.logger.debug("Instantiate Population ")
        self.pop = Population(
            fmu_path=fmu_path,
            pop_size=pop_size,
            inp=inp,
            known=known_df,
            est=estpars,
            ideal=ideal,
            init=True,
            ftype=ftype,
            init_pop=init_pop,
        )
예제 #5
0
    def __init__(
        self,
        fmu_path,
        inp,
        known,
        est,
        ideal,
        options={},
        ftype="RMSE",
        generations=None,
        pop_size=None,
        mut_rate=None,
        trm_size=None,
        tol=None,
        inertia=None,
        workers=None,
    ):
        """
        :param fmu_path: string, absolute path to the FMU
        :param inp: DataFrame, columns with input timeseries, index in seconds
        :param known: Dictionary, key=parameter_name, value=value
        :param est: Dictionary, key=parameter_name, value=tuple
                    (guess value, lo limit, hi limit), guess can be None
        :param ideal: DataFrame, ideal solution to be compared with model
                      outputs (variable names must match)
        :param options: dict, additional options passed to the solver (not used here)
        :param ftype: str, cost function type. Currently 'NRMSE' (advised
                      for multi-objective estimation) or 'RMSE'.
        :param generations: int, max. number of generations
        :param pop_size: int, population size
        :param mut_rate: float, mutation rate
        :param trm_size: int, tournament size
        :param tol: float, absolute solution tolerance
        :param inertia: int, maximum number of non-improving generations
        :param workers: int, number of CPUs to use
        """
        self.logger = logging.getLogger(type(self).__name__)

        assert inp.index.equals(
            ideal.index), "inp and ideal indexes are not matching"

        self.fmu_path = fmu_path
        self.inp = inp
        self.known = known
        self.ideal = ideal
        self.ftype = ftype

        # Default solver options
        self.workers = os.cpu_count()  # CPU cores to use
        self.options = {
            "generations": 50,  # Max. number of generations
            "pop_size": 30,  # Population size
            "mut_rate": 0.01,  # Mutation rate
            "trm_size": 3,  # Tournament size
            "tol": 1e-3,  # Solution tolerance
            "inertia": 100,  # Max. number of non-improving generations
            "xover_ratio": 0.5,  # Crossover ratio
        }

        # User options
        if workers is not None:
            self.workers = workers
        if generations is not None:
            self.options["generations"] = generations
        if pop_size is not None:
            self.options["pop_size"] = pop_size
        if mut_rate is not None:
            self.options["mut_rate"] = mut_rate
        if trm_size is not None:
            self.options["trm_size"] = trm_size
        if tol is not None:
            self.options["tol"] = tol
        if inertia is not None:
            self.options["inertia"] = inertia

        # Adjust trm_size if population size is too small
        if self.options["trm_size"] >= (self.options["pop_size"] //
                                        (self.workers * 2)):
            new_trm_size = self.options["pop_size"] // (self.workers * 4)
            new_pop_size = self.options["pop_size"]
            if new_trm_size < 2:
                new_trm_size = 2
                new_pop_size = new_trm_size * self.workers * 4
            self.logger.warning(
                "Tournament size has to be lower than pop_size // (workers * 2). "
                f"Re-adjusting to trm_size = {new_trm_size}, pop_size = {new_pop_size}"
            )
            self.options["trm_size"] = new_trm_size
            self.options["pop_size"] = new_pop_size

        # Warn the user about a possible mistake in the chosen options
        if self.options["trm_size"] <= 1:
            self.logger.warning(
                "Tournament size equals 1. The possible reasons are:\n"
                "   - too small population size leading to readjusted tournament size\n"
                "   - too many workers (population is divided among workers)\n"
                "   - you chose tournament size equal to 1 by mistake\n"
                "The optimization will proceed, but the performance "
                "might be suboptimal...")

        self.logger.debug(f"MODESTGA options: {self.options}")
        self.logger.debug(f"MODESTGA workers = {self.workers}")

        # Known parameters to DataFrame
        known_df = pd.DataFrame()
        for key in known:
            assert (
                known[key] is not None
            ), "None is not allowed in known parameters (parameter {})".format(
                key)
            known_df[key] = [known[key]]

        # est: dictionary to a list with EstPar instances
        self.est = list()
        for key in est:
            lo = est[key][1]
            hi = est[key][2]
            if est[key][0] is None:  # If guess is None, assume random guess
                v = lo + random() * (hi - lo)
            else:  # Else, take the guess passed in est
                v = est[key][0]
            self.est.append(EstPar(name=key, value=v, lo=lo, hi=hi))
        est = self.est

        # Model
        output_names = [var for var in ideal]

        # Outputs
        self.summary = pd.DataFrame()
        self.res = pd.DataFrame()
        self.best_err = 1e7

        # Temporary placeholder for summary
        # It needs to be stored as class variable, because it has to be updated
        # from a static method used as callback
        self.summary_cols = [x.name for x in self.est
                             ] + [MODESTGA.ERR, MODESTGA.METHOD]
        MODESTGA.TMP_SUMMARY = pd.DataFrame(columns=self.summary_cols)

        # Log
        self.logger.info("MODESTGA initialized... =========================")