Ejemplo n.º 1
0
    def test_param_from_pandas(self):
        # Test issue #68
        model = ConcreteModel()
        model.I = Set(initialize=range(6))

        model.P0 = Param(model.I,
                         initialize={
                             0: 400.0,
                             1: 0.0,
                             2: 0.0,
                             3: 0.0,
                             4: 0.0,
                             5: 240.0
                         })
        model.P1 = Param(model.I,
                         initialize=pd.Series({
                             0: 400.0,
                             1: 0.0,
                             2: 0.0,
                             3: 0.0,
                             4: 0.0,
                             5: 240.0
                         }).to_dict())
        model.P2 = Param(model.I,
                         initialize=pd.Series({
                             0: 400.0,
                             1: 0.0,
                             2: 0.0,
                             3: 0.0,
                             4: 0.0,
                             5: 240.0
                         }))

        #model.pprint()
        self.assertEqual(list(model.P0.values()), list(model.P1.values()))
        self.assertEqual(list(model.P0.values()), list(model.P2.values()))

        model.V = Var(model.I, initialize=0)

        def rule(m, l):
            return -m.P0[l] <= m.V[l]

        model.Constraint0 = Constraint(model.I, rule=rule)

        def rule(m, l):
            return -m.P1[l] <= m.V[l]

        model.Constraint1 = Constraint(model.I, rule=rule)

        def rule(m, l):
            return -m.P2[l] <= m.V[l]

        model.Constraint2 = Constraint(model.I, rule=rule)
Ejemplo n.º 2
0
    def test_bootstrap(self):
        objval, thetavals = self.pest.theta_est()

        num_bootstraps = 10
        theta_est = self.pest.theta_est_bootstrap(num_bootstraps,
                                                  return_samples=True)

        num_samples = theta_est['samples'].apply(len)
        self.assertTrue(len(theta_est.index), 10)
        self.assertTrue(num_samples.equals(pd.Series([6] * 10)))

        del theta_est['samples']

        # apply cofidence region test
        CR = self.pest.confidence_region_test(theta_est, 'MVN',
                                              [0.5, 0.75, 1.0])

        self.assertTrue(set(CR.columns) >= set([0.5, 0.75, 1.0]))
        self.assertTrue(CR[0.5].sum() == 5)
        self.assertTrue(CR[0.75].sum() == 7)
        self.assertTrue(CR[1.0].sum() == 10)  # all true

        graphics.pairwise_plot(theta_est)
        graphics.pairwise_plot(theta_est, thetavals)
        graphics.pairwise_plot(theta_est, thetavals, 0.8,
                               ['MVN', 'KDE', 'Rect'])
Ejemplo n.º 3
0
    def likelihood_ratio_test(self,
                              obj_at_theta,
                              obj_value,
                              alphas,
                              return_thresholds=False):
        r"""
        Likelihood ratio test to identify theta values within a confidence 
        region using the :math:`\chi^2` distribution
        
        Parameters
        ----------
        obj_at_theta: pd.DataFrame, columns = theta_names + 'obj'
            Objective values for each theta value (returned by 
            objective_at_theta)
        obj_value: int or float
            Objective value from parameter estimation using all data
        alphas: list
            List of alpha values to use in the chi2 test
        return_thresholds: bool, optional
            Return the threshold value for each alpha
            
        Returns
        -------
        LR: pd.DataFrame 
            Objective values for each theta value along with True or False for 
            each alpha
        thresholds: pd.Series
            If return_threshold = True, the thresholds are also returned.
        """
        assert isinstance(obj_at_theta, pd.DataFrame)
        assert isinstance(obj_value, (int, float))
        assert isinstance(alphas, list)
        assert isinstance(return_thresholds, bool)

        LR = obj_at_theta.copy()
        S = len(self.callback_data)
        thresholds = {}
        for a in alphas:
            chi2_val = scipy.stats.chi2.ppf(a, 2)
            thresholds[a] = obj_value * ((chi2_val / (S - 2)) + 1)
            LR[a] = LR['obj'] < thresholds[a]

        thresholds = pd.Series(thresholds)

        if return_thresholds:
            return LR, thresholds
        else:
            return LR
Ejemplo n.º 4
0
    def test_param_from_pandas_series_index(self):
        m = ConcreteModel()
        s = pd.Series([1, 3, 5], index=['T1', 'T2', 'T3'])

        # Params treat Series as maps (so the Series index matters)
        m.I = Set(initialize=s.index)
        m.p1 = Param(m.I, initialize=s)
        self.assertEqual(m.p1.extract_values(), {'T1': 1, 'T2': 3, 'T3': 5})
        m.p2 = Param(s.index, initialize=s)
        self.assertEqual(m.p2.extract_values(), {'T1': 1, 'T2': 3, 'T3': 5})
        with self.assertRaisesRegex(
                KeyError,
                "Index 'T1' is not valid for indexed component 'p3'"):
            m.p3 = Param([0, 1, 2], initialize=s)

        # Sets treat Series as lists
        m.J = Set(initialize=s)
        self.assertEqual(set(m.J), {1, 3, 5})
Ejemplo n.º 5
0
def pairwise_plot(theta_values, theta_star=None, alpha=None, distributions=[], 
                  axis_limits=None, title=None, add_obj_contour=True, 
                  add_legend=True, filename=None):
    """
    Plot pairwise relationship for theta values, and optionally alpha-level 
    confidence intervals and objective value contours
    
    Parameters
    ----------
    theta_values: DataFrame or tuple
    
        * If theta_values is a DataFrame, then it contains one column for each theta variable 
          and (optionally) an objective value column ('obj') and columns that contains 
          Boolean results from confidence interval tests (labeled using the alpha value). 
          Each row is a sample.
          
          * Theta variables can be computed from ``theta_est_bootstrap``, 
            ``theta_est_leaveNout``, and  ``leaveNout_bootstrap_test``.
          * The objective value can be computed using the ``likelihood_ratio_test``.
          * Results from confidence interval tests can be computed using the  
           ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and 
           ``confidence_region_test``.

        * If theta_values is a tuple, then it contains a mean, covariance, and number 
          of samples (mean, cov, n) where mean is a dictionary or Series 
          (indexed by variable name), covariance is a DataFrame (indexed by 
          variable name, one column per variable name), and n is an integer.
          The mean and covariance are used to create a multivariate normal 
          sample of n theta values. The covariance can be computed using 
          ``theta_est(calc_cov=True)``.
        
    theta_star: dict or Series, optional
        Estimated value of theta.  The dictionary or Series is indexed by variable name.  
        Theta_star is used to slice higher dimensional contour intervals in 2D
    alpha: float, optional
        Confidence interval value, if an alpha value is given and the 
        distributions list is empty, the data will be filtered by True/False 
        values using the column name whose value equals alpha (see results from
        ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and 
        ``confidence_region_test``)
    distributions: list of strings, optional
        Statistical distribution used to define a confidence region, 
        options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, and 
        'Rect' for rectangular.
        Confidence interval is a 2D slice, using linear interpolation at theta_star.
    axis_limits: dict, optional
        Axis limits in the format {variable: [min, max]}
    title: string, optional
        Plot title
    add_obj_contour: bool, optional
        Add a contour plot using the column 'obj' in theta_values.
        Contour plot is a 2D slice, using linear interpolation at theta_star.
    add_legend: bool, optional
        Add a legend to the plot
    filename: string, optional
        Filename used to save the figure
    """
    assert isinstance(theta_values, (pd.DataFrame, tuple))
    assert isinstance(theta_star, (type(None), dict, pd.Series, pd.DataFrame))
    assert isinstance(alpha, (type(None), int, float))
    assert isinstance(distributions, list)
    assert set(distributions).issubset(set(['MVN', 'KDE', 'Rect']))
    assert isinstance(axis_limits, (type(None), dict))
    assert isinstance(title, (type(None), str))
    assert isinstance(add_obj_contour, bool)
    assert isinstance(filename, (type(None), str))
    
    # If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values
    if isinstance(theta_values, tuple):
        assert(len(theta_values) == 3)
        mean = theta_values[0]
        cov = theta_values[1]
        n = theta_values[2]
        if isinstance(mean, dict):
            mean = pd.Series(mean)
        theta_names = mean.index
        mvn_dist = stats.multivariate_normal(mean, cov)
        theta_values = pd.DataFrame(mvn_dist.rvs(n, random_state=1), columns=theta_names)
            
    assert(theta_values.shape[0] > 0)
    
    if isinstance(theta_star, dict):
        theta_star = pd.Series(theta_star)
    if isinstance(theta_star, pd.DataFrame):
        theta_star = theta_star.loc[0,:]
    
    theta_names = [col for col in theta_values.columns if (col not in ['obj']) 
                        and (not isinstance(col, float)) and (not isinstance(col, int))]
    
    # Filter data by alpha
    if (alpha in theta_values.columns) and (len(distributions) == 0):
        thetas = theta_values.loc[theta_values[alpha] == True, theta_names]
    else:
        thetas = theta_values[theta_names]
    
    if theta_star is not None:
        theta_star = theta_star[theta_names]
    
    legend_elements = []
    
    g = sns.PairGrid(thetas)
    
    # Plot histogram on the diagonal
    # Note: distplot is deprecated and will be removed in a future
    #       version of seaborn, use histplot.  distplot is kept for older
    #       versions of python.
    if check_min_version(sns, "0.11"):
        g.map_diag(sns.histplot)
    else:
        g.map_diag(sns.distplot, kde=False, hist=True, norm_hist=False) 
    
    # Plot filled contours using all theta values based on obj
    if 'obj' in theta_values.columns and add_obj_contour:
        g.map_offdiag(_add_obj_contour, columns=theta_names, data=theta_values, 
                      theta_star=theta_star)
        
    # Plot thetas
    g.map_offdiag(plt.scatter, s=10)
    legend_elements.append(matplotlib.lines.Line2D(
        [0], [0], marker='o', color='w', label='thetas',
        markerfacecolor='cadetblue', markersize=5))
    
    # Plot theta*
    if theta_star is not None:
        g.map_offdiag(_add_scatter, color='k', columns=theta_names, theta_star=theta_star)
        
        legend_elements.append(matplotlib.lines.Line2D(
            [0], [0], marker='o', color='w', label='theta*',
            markerfacecolor='k', markersize=6))
    
    # Plot confidence regions
    colors = ['r', 'mediumblue', 'darkgray']
    if (alpha is not None) and (len(distributions) > 0):
        
        if theta_star is None:
            print("""theta_star is not defined, confidence region slice will be 
                  plotted at the mean value of theta""")
            theta_star = thetas.mean()
        
        mvn_dist = None
        kde_dist = None
        for i, dist in enumerate(distributions):
            if dist == 'Rect':
                lb, ub = fit_rect_dist(thetas, alpha)
                g.map_offdiag(_add_rectangle_CI, color=colors[i], columns=theta_names, 
                            lower_bound=lb, upper_bound=ub)
                legend_elements.append(matplotlib.lines.Line2D(
                    [0], [0], color=colors[i], lw=1, label=dist))
                
            elif dist == 'MVN':
                mvn_dist = fit_mvn_dist(thetas)
                Z = mvn_dist.pdf(thetas)
                score = stats.scoreatpercentile(Z, (1-alpha)*100) 
                g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, 
                            ncells=100, alpha=score, dist=mvn_dist, 
                            theta_star=theta_star)
                legend_elements.append(matplotlib.lines.Line2D(
                    [0], [0], color=colors[i], lw=1, label=dist))
                
            elif dist == 'KDE':
                kde_dist = fit_kde_dist(thetas)
                Z = kde_dist.pdf(thetas.transpose())
                score = stats.scoreatpercentile(Z, (1-alpha)*100) 
                g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, 
                            ncells=100, alpha=score, dist=kde_dist, 
                            theta_star=theta_star)
                legend_elements.append(matplotlib.lines.Line2D(
                    [0], [0], color=colors[i], lw=1, label=dist))
            
    _set_axis_limits(g, axis_limits, thetas, theta_star)
    
    for ax in g.axes.flatten():
        ax.ticklabel_format(style='sci', scilimits=(-2,2), axis='both')
        
        if add_legend:
            xvar, yvar, loc = _get_variables(ax, theta_names)
            if loc == (len(theta_names)-1,0):
                ax.legend(handles=legend_elements, loc='best', prop={'size': 8})
    if title:
        g.fig.subplots_adjust(top=0.9)
        g.fig.suptitle(title) 
        
    # Work in progress
    # Plot lower triangle graphics in separate figures, useful for presentations
    lower_triangle_only = False
    if lower_triangle_only:
        for ax in g.axes.flatten():
            xvar, yvar, (xloc, yloc) = _get_variables(ax, theta_names)
            if xloc < yloc: # lower triangle
                ax.remove()
                
                ax.set_xlabel(xvar)
                ax.set_ylabel(yvar)
                
                fig = plt.figure()
                ax.figure=fig
                fig.axes.append(ax)
                fig.add_axes(ax)
                
                f, dummy = plt.subplots()
                bbox = dummy.get_position()
                ax.set_position(bbox) 
                dummy.remove()
                plt.close(f)

                ax.tick_params(reset=True)
                
                if add_legend:
                    ax.legend(handles=legend_elements, loc='best', prop={'size': 8})
                
        plt.close(g.fig)
    
    if filename is None:
        plt.show()
    else:
        plt.savefig(filename)
        plt.close()
Ejemplo n.º 6
0
 def confidence_region_test(self, theta_values, distribution, alphas, 
                            test_theta_values=None):
     """
     Confidence region test to determine if theta values are within a 
     rectangular, multivariate normal, or Gaussian kernel density distribution 
     for a range of alpha values
     
     Parameters
     ----------
     theta_values: DataFrame, columns = theta_names
         Theta values used to generate a confidence region 
         (generally returned by theta_est_bootstrap)
     distribution: string
         Statistical distribution used to define a confidence region,  
         options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, 
         and 'Rect' for rectangular.
     alphas: list
         List of alpha values used to determine if theta values are inside 
         or outside the region.
     test_theta_values: dictionary or DataFrame, keys/columns = theta_names, optional
         Additional theta values that are compared to the confidence region
         to determine if they are inside or outside.
     
     Returns
     -------
     training_results: DataFrame 
         Theta value used to generate the confidence region along with True 
         (inside) or False (outside) for each alpha
     test_results: DataFrame 
         If test_theta_values is not None, returns test theta value along 
         with True (inside) or False (outside) for each alpha
     """
     assert isinstance(theta_values, pd.DataFrame)
     assert distribution in ['Rect', 'MVN', 'KDE']
     assert isinstance(alphas, list)
     assert isinstance(test_theta_values, (type(None), dict, pd.DataFrame))
     
     if isinstance(test_theta_values, dict):
         test_theta_values = pd.Series(test_theta_values).to_frame().transpose()
         
     training_results = theta_values.copy()
     
     if test_theta_values is not None:
         test_result = test_theta_values.copy()
     
     for a in alphas:
         
         if distribution == 'Rect':
             lb, ub = fit_rect_dist(theta_values, a)
             training_results[a] = ((theta_values > lb).all(axis=1) & \
                               (theta_values < ub).all(axis=1))
             
             if test_theta_values is not None:
                 # use upper and lower bound from the training set
                 test_result[a] = ((test_theta_values > lb).all(axis=1) & \
                               (test_theta_values < ub).all(axis=1))
                 
         elif distribution == 'MVN':
             dist = fit_mvn_dist(theta_values)
             Z = dist.pdf(theta_values)
             score = scipy.stats.scoreatpercentile(Z, (1-a)*100) 
             training_results[a] = (Z >= score)
             
             if test_theta_values is not None:
                 # use score from the training set
                 Z = dist.pdf(test_theta_values)
                 test_result[a] = (Z >= score) 
             
         elif distribution == 'KDE':
             dist = fit_kde_dist(theta_values)
             Z = dist.pdf(theta_values.transpose())
             score = scipy.stats.scoreatpercentile(Z, (1-a)*100) 
             training_results[a] = (Z >= score)
             
             if test_theta_values is not None:
                 # use score from the training set
                 Z = dist.pdf(test_theta_values.transpose())
                 test_result[a] = (Z >= score) 
                 
     if test_theta_values is not None:
         return training_results, test_result
     else:
         return training_results
Ejemplo n.º 7
0
    def _Q_opt(self,
               ThetaVals=None,
               solver="ef_ipopt",
               return_values=[],
               bootlist=None,
               calc_cov=False,
               cov_n=None):
        """
        Set up all thetas as first stage Vars, return resulting theta
        values as well as the objective function value.

        """
        if (solver == "k_aug"):
            raise RuntimeError("k_aug no longer supported.")

        # (Bootstrap scenarios will use indirection through the bootlist)
        if bootlist is None:
            senario_numbers = list(range(len(self.callback_data)))
            scen_names = ["Scenario{}".format(i) for i in senario_numbers]
        else:
            scen_names = ["Scenario{}".format(i) for i in range(len(bootlist))]

        # tree_model.CallbackModule = None
        outer_cb_data = dict()
        outer_cb_data["callback"] = self._instance_creation_callback
        if ThetaVals is not None:
            outer_cb_data["ThetaVals"] = ThetaVals
        if bootlist is not None:
            outer_cb_data["BootList"] = bootlist
        outer_cb_data["cb_data"] = self.callback_data  # None is OK
        outer_cb_data["theta_names"] = self.theta_names

        options = {"solver": "ipopt"}
        scenario_creator_options = {"cb_data": outer_cb_data}
        if use_mpisppy:
            ef = sputils.create_EF(
                scen_names,
                _experiment_instance_creation_callback,
                EF_name="_Q_opt",
                suppress_warnings=True,
                scenario_creator_kwargs=scenario_creator_options)
        else:
            ef = local_ef.create_EF(
                scen_names,
                _experiment_instance_creation_callback,
                EF_name="_Q_opt",
                suppress_warnings=True,
                scenario_creator_kwargs=scenario_creator_options)
        self.ef_instance = ef

        # Solve the extensive form with ipopt
        if solver == "ef_ipopt":

            if not calc_cov:
                # Do not calculate the reduced hessian

                solver = SolverFactory('ipopt')
                if self.solver_options is not None:
                    for key in self.solver_options:
                        solver.options[key] = self.solver_options[key]

                solve_result = solver.solve(ef, tee=self.tee)

            # The import error will be raised when we attempt to use
            # inv_reduced_hessian_barrier below.
            #
            #elif not asl_available:
            #    raise ImportError("parmest requires ASL to calculate the "
            #                      "covariance matrix with solver 'ipopt'")
            else:
                # parmest makes the fitted parameters stage 1 variables
                ind_vars = []
                for ndname, Var, solval in ef_nonants(ef):
                    ind_vars.append(Var)
                # calculate the reduced hessian
                solve_result, inv_red_hes = \
                    inverse_reduced_hessian.inv_reduced_hessian_barrier(
                        self.ef_instance,
                        independent_variables= ind_vars,
                        solver_options=self.solver_options,
                        tee=self.tee)

            if self.diagnostic_mode:
                print('    Solver termination condition = ',
                      str(solve_result.solver.termination_condition))

            # assume all first stage are thetas...
            thetavals = {}
            for ndname, Var, solval in ef_nonants(ef):
                # process the name
                # the scenarios are blocks, so strip the scenario name
                vname = Var.name[Var.name.find(".") + 1:]
                thetavals[vname] = solval

            objval = pyo.value(ef.EF_Obj)

            if calc_cov:
                # Calculate the covariance matrix

                # Number of data points considered
                n = cov_n

                # Extract number of fitted parameters
                l = len(thetavals)

                # Assumption: Objective value is sum of squared errors
                sse = objval
                '''Calculate covariance assuming experimental observation errors are
                independent and follow a Gaussian 
                distribution with constant variance.
                
                The formula used in parmest was verified against equations (7-5-15) and
                (7-5-16) in "Nonlinear Parameter Estimation", Y. Bard, 1974.
                
                This formula is also applicable if the objective is scaled by a constant;
                the constant cancels out. (was scaled by 1/n because it computes an
                expected value.)
                '''
                cov = 2 * sse / (n - l) * inv_red_hes
                cov = pd.DataFrame(cov,
                                   index=thetavals.keys(),
                                   columns=thetavals.keys())

            thetavals = pd.Series(thetavals)

            if len(return_values) > 0:
                var_values = []
                for exp_i in self.ef_instance.component_objects(
                        Block, descend_into=False):
                    vals = {}
                    for var in return_values:
                        exp_i_var = exp_i.find_component(str(var))
                        if exp_i_var is None:  # we might have a block such as _mpisppy_data
                            continue
                        temp = [pyo.value(_) for _ in exp_i_var.values()]
                        if len(temp) == 1:
                            vals[var] = temp[0]
                        else:
                            vals[var] = temp
                    if len(vals) > 0:
                        var_values.append(vals)
                var_values = pd.DataFrame(var_values)
                if calc_cov:
                    return objval, thetavals, var_values, cov
                else:
                    return objval, thetavals, var_values

            if calc_cov:

                return objval, thetavals, cov
            else:
                return objval, thetavals

        else:
            raise RuntimeError("Unknown solver in Q_Opt=" + solver)