def test_param_from_pandas(self): # Test issue #68 model = ConcreteModel() model.I = Set(initialize=range(6)) model.P0 = Param(model.I, initialize={ 0: 400.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 240.0 }) model.P1 = Param(model.I, initialize=pd.Series({ 0: 400.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 240.0 }).to_dict()) model.P2 = Param(model.I, initialize=pd.Series({ 0: 400.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 240.0 })) #model.pprint() self.assertEqual(list(model.P0.values()), list(model.P1.values())) self.assertEqual(list(model.P0.values()), list(model.P2.values())) model.V = Var(model.I, initialize=0) def rule(m, l): return -m.P0[l] <= m.V[l] model.Constraint0 = Constraint(model.I, rule=rule) def rule(m, l): return -m.P1[l] <= m.V[l] model.Constraint1 = Constraint(model.I, rule=rule) def rule(m, l): return -m.P2[l] <= m.V[l] model.Constraint2 = Constraint(model.I, rule=rule)
def test_bootstrap(self): objval, thetavals = self.pest.theta_est() num_bootstraps = 10 theta_est = self.pest.theta_est_bootstrap(num_bootstraps, return_samples=True) num_samples = theta_est['samples'].apply(len) self.assertTrue(len(theta_est.index), 10) self.assertTrue(num_samples.equals(pd.Series([6] * 10))) del theta_est['samples'] # apply cofidence region test CR = self.pest.confidence_region_test(theta_est, 'MVN', [0.5, 0.75, 1.0]) self.assertTrue(set(CR.columns) >= set([0.5, 0.75, 1.0])) self.assertTrue(CR[0.5].sum() == 5) self.assertTrue(CR[0.75].sum() == 7) self.assertTrue(CR[1.0].sum() == 10) # all true graphics.pairwise_plot(theta_est) graphics.pairwise_plot(theta_est, thetavals) graphics.pairwise_plot(theta_est, thetavals, 0.8, ['MVN', 'KDE', 'Rect'])
def likelihood_ratio_test(self, obj_at_theta, obj_value, alphas, return_thresholds=False): r""" Likelihood ratio test to identify theta values within a confidence region using the :math:`\chi^2` distribution Parameters ---------- obj_at_theta: pd.DataFrame, columns = theta_names + 'obj' Objective values for each theta value (returned by objective_at_theta) obj_value: int or float Objective value from parameter estimation using all data alphas: list List of alpha values to use in the chi2 test return_thresholds: bool, optional Return the threshold value for each alpha Returns ------- LR: pd.DataFrame Objective values for each theta value along with True or False for each alpha thresholds: pd.Series If return_threshold = True, the thresholds are also returned. """ assert isinstance(obj_at_theta, pd.DataFrame) assert isinstance(obj_value, (int, float)) assert isinstance(alphas, list) assert isinstance(return_thresholds, bool) LR = obj_at_theta.copy() S = len(self.callback_data) thresholds = {} for a in alphas: chi2_val = scipy.stats.chi2.ppf(a, 2) thresholds[a] = obj_value * ((chi2_val / (S - 2)) + 1) LR[a] = LR['obj'] < thresholds[a] thresholds = pd.Series(thresholds) if return_thresholds: return LR, thresholds else: return LR
def test_param_from_pandas_series_index(self): m = ConcreteModel() s = pd.Series([1, 3, 5], index=['T1', 'T2', 'T3']) # Params treat Series as maps (so the Series index matters) m.I = Set(initialize=s.index) m.p1 = Param(m.I, initialize=s) self.assertEqual(m.p1.extract_values(), {'T1': 1, 'T2': 3, 'T3': 5}) m.p2 = Param(s.index, initialize=s) self.assertEqual(m.p2.extract_values(), {'T1': 1, 'T2': 3, 'T3': 5}) with self.assertRaisesRegex( KeyError, "Index 'T1' is not valid for indexed component 'p3'"): m.p3 = Param([0, 1, 2], initialize=s) # Sets treat Series as lists m.J = Set(initialize=s) self.assertEqual(set(m.J), {1, 3, 5})
def pairwise_plot(theta_values, theta_star=None, alpha=None, distributions=[], axis_limits=None, title=None, add_obj_contour=True, add_legend=True, filename=None): """ Plot pairwise relationship for theta values, and optionally alpha-level confidence intervals and objective value contours Parameters ---------- theta_values: DataFrame or tuple * If theta_values is a DataFrame, then it contains one column for each theta variable and (optionally) an objective value column ('obj') and columns that contains Boolean results from confidence interval tests (labeled using the alpha value). Each row is a sample. * Theta variables can be computed from ``theta_est_bootstrap``, ``theta_est_leaveNout``, and ``leaveNout_bootstrap_test``. * The objective value can be computed using the ``likelihood_ratio_test``. * Results from confidence interval tests can be computed using the ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and ``confidence_region_test``. * If theta_values is a tuple, then it contains a mean, covariance, and number of samples (mean, cov, n) where mean is a dictionary or Series (indexed by variable name), covariance is a DataFrame (indexed by variable name, one column per variable name), and n is an integer. The mean and covariance are used to create a multivariate normal sample of n theta values. The covariance can be computed using ``theta_est(calc_cov=True)``. theta_star: dict or Series, optional Estimated value of theta. The dictionary or Series is indexed by variable name. Theta_star is used to slice higher dimensional contour intervals in 2D alpha: float, optional Confidence interval value, if an alpha value is given and the distributions list is empty, the data will be filtered by True/False values using the column name whose value equals alpha (see results from ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and ``confidence_region_test``) distributions: list of strings, optional Statistical distribution used to define a confidence region, options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, and 'Rect' for rectangular. Confidence interval is a 2D slice, using linear interpolation at theta_star. axis_limits: dict, optional Axis limits in the format {variable: [min, max]} title: string, optional Plot title add_obj_contour: bool, optional Add a contour plot using the column 'obj' in theta_values. Contour plot is a 2D slice, using linear interpolation at theta_star. add_legend: bool, optional Add a legend to the plot filename: string, optional Filename used to save the figure """ assert isinstance(theta_values, (pd.DataFrame, tuple)) assert isinstance(theta_star, (type(None), dict, pd.Series, pd.DataFrame)) assert isinstance(alpha, (type(None), int, float)) assert isinstance(distributions, list) assert set(distributions).issubset(set(['MVN', 'KDE', 'Rect'])) assert isinstance(axis_limits, (type(None), dict)) assert isinstance(title, (type(None), str)) assert isinstance(add_obj_contour, bool) assert isinstance(filename, (type(None), str)) # If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values if isinstance(theta_values, tuple): assert(len(theta_values) == 3) mean = theta_values[0] cov = theta_values[1] n = theta_values[2] if isinstance(mean, dict): mean = pd.Series(mean) theta_names = mean.index mvn_dist = stats.multivariate_normal(mean, cov) theta_values = pd.DataFrame(mvn_dist.rvs(n, random_state=1), columns=theta_names) assert(theta_values.shape[0] > 0) if isinstance(theta_star, dict): theta_star = pd.Series(theta_star) if isinstance(theta_star, pd.DataFrame): theta_star = theta_star.loc[0,:] theta_names = [col for col in theta_values.columns if (col not in ['obj']) and (not isinstance(col, float)) and (not isinstance(col, int))] # Filter data by alpha if (alpha in theta_values.columns) and (len(distributions) == 0): thetas = theta_values.loc[theta_values[alpha] == True, theta_names] else: thetas = theta_values[theta_names] if theta_star is not None: theta_star = theta_star[theta_names] legend_elements = [] g = sns.PairGrid(thetas) # Plot histogram on the diagonal # Note: distplot is deprecated and will be removed in a future # version of seaborn, use histplot. distplot is kept for older # versions of python. if check_min_version(sns, "0.11"): g.map_diag(sns.histplot) else: g.map_diag(sns.distplot, kde=False, hist=True, norm_hist=False) # Plot filled contours using all theta values based on obj if 'obj' in theta_values.columns and add_obj_contour: g.map_offdiag(_add_obj_contour, columns=theta_names, data=theta_values, theta_star=theta_star) # Plot thetas g.map_offdiag(plt.scatter, s=10) legend_elements.append(matplotlib.lines.Line2D( [0], [0], marker='o', color='w', label='thetas', markerfacecolor='cadetblue', markersize=5)) # Plot theta* if theta_star is not None: g.map_offdiag(_add_scatter, color='k', columns=theta_names, theta_star=theta_star) legend_elements.append(matplotlib.lines.Line2D( [0], [0], marker='o', color='w', label='theta*', markerfacecolor='k', markersize=6)) # Plot confidence regions colors = ['r', 'mediumblue', 'darkgray'] if (alpha is not None) and (len(distributions) > 0): if theta_star is None: print("""theta_star is not defined, confidence region slice will be plotted at the mean value of theta""") theta_star = thetas.mean() mvn_dist = None kde_dist = None for i, dist in enumerate(distributions): if dist == 'Rect': lb, ub = fit_rect_dist(thetas, alpha) g.map_offdiag(_add_rectangle_CI, color=colors[i], columns=theta_names, lower_bound=lb, upper_bound=ub) legend_elements.append(matplotlib.lines.Line2D( [0], [0], color=colors[i], lw=1, label=dist)) elif dist == 'MVN': mvn_dist = fit_mvn_dist(thetas) Z = mvn_dist.pdf(thetas) score = stats.scoreatpercentile(Z, (1-alpha)*100) g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, ncells=100, alpha=score, dist=mvn_dist, theta_star=theta_star) legend_elements.append(matplotlib.lines.Line2D( [0], [0], color=colors[i], lw=1, label=dist)) elif dist == 'KDE': kde_dist = fit_kde_dist(thetas) Z = kde_dist.pdf(thetas.transpose()) score = stats.scoreatpercentile(Z, (1-alpha)*100) g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, ncells=100, alpha=score, dist=kde_dist, theta_star=theta_star) legend_elements.append(matplotlib.lines.Line2D( [0], [0], color=colors[i], lw=1, label=dist)) _set_axis_limits(g, axis_limits, thetas, theta_star) for ax in g.axes.flatten(): ax.ticklabel_format(style='sci', scilimits=(-2,2), axis='both') if add_legend: xvar, yvar, loc = _get_variables(ax, theta_names) if loc == (len(theta_names)-1,0): ax.legend(handles=legend_elements, loc='best', prop={'size': 8}) if title: g.fig.subplots_adjust(top=0.9) g.fig.suptitle(title) # Work in progress # Plot lower triangle graphics in separate figures, useful for presentations lower_triangle_only = False if lower_triangle_only: for ax in g.axes.flatten(): xvar, yvar, (xloc, yloc) = _get_variables(ax, theta_names) if xloc < yloc: # lower triangle ax.remove() ax.set_xlabel(xvar) ax.set_ylabel(yvar) fig = plt.figure() ax.figure=fig fig.axes.append(ax) fig.add_axes(ax) f, dummy = plt.subplots() bbox = dummy.get_position() ax.set_position(bbox) dummy.remove() plt.close(f) ax.tick_params(reset=True) if add_legend: ax.legend(handles=legend_elements, loc='best', prop={'size': 8}) plt.close(g.fig) if filename is None: plt.show() else: plt.savefig(filename) plt.close()
def confidence_region_test(self, theta_values, distribution, alphas, test_theta_values=None): """ Confidence region test to determine if theta values are within a rectangular, multivariate normal, or Gaussian kernel density distribution for a range of alpha values Parameters ---------- theta_values: DataFrame, columns = theta_names Theta values used to generate a confidence region (generally returned by theta_est_bootstrap) distribution: string Statistical distribution used to define a confidence region, options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, and 'Rect' for rectangular. alphas: list List of alpha values used to determine if theta values are inside or outside the region. test_theta_values: dictionary or DataFrame, keys/columns = theta_names, optional Additional theta values that are compared to the confidence region to determine if they are inside or outside. Returns ------- training_results: DataFrame Theta value used to generate the confidence region along with True (inside) or False (outside) for each alpha test_results: DataFrame If test_theta_values is not None, returns test theta value along with True (inside) or False (outside) for each alpha """ assert isinstance(theta_values, pd.DataFrame) assert distribution in ['Rect', 'MVN', 'KDE'] assert isinstance(alphas, list) assert isinstance(test_theta_values, (type(None), dict, pd.DataFrame)) if isinstance(test_theta_values, dict): test_theta_values = pd.Series(test_theta_values).to_frame().transpose() training_results = theta_values.copy() if test_theta_values is not None: test_result = test_theta_values.copy() for a in alphas: if distribution == 'Rect': lb, ub = fit_rect_dist(theta_values, a) training_results[a] = ((theta_values > lb).all(axis=1) & \ (theta_values < ub).all(axis=1)) if test_theta_values is not None: # use upper and lower bound from the training set test_result[a] = ((test_theta_values > lb).all(axis=1) & \ (test_theta_values < ub).all(axis=1)) elif distribution == 'MVN': dist = fit_mvn_dist(theta_values) Z = dist.pdf(theta_values) score = scipy.stats.scoreatpercentile(Z, (1-a)*100) training_results[a] = (Z >= score) if test_theta_values is not None: # use score from the training set Z = dist.pdf(test_theta_values) test_result[a] = (Z >= score) elif distribution == 'KDE': dist = fit_kde_dist(theta_values) Z = dist.pdf(theta_values.transpose()) score = scipy.stats.scoreatpercentile(Z, (1-a)*100) training_results[a] = (Z >= score) if test_theta_values is not None: # use score from the training set Z = dist.pdf(test_theta_values.transpose()) test_result[a] = (Z >= score) if test_theta_values is not None: return training_results, test_result else: return training_results
def _Q_opt(self, ThetaVals=None, solver="ef_ipopt", return_values=[], bootlist=None, calc_cov=False, cov_n=None): """ Set up all thetas as first stage Vars, return resulting theta values as well as the objective function value. """ if (solver == "k_aug"): raise RuntimeError("k_aug no longer supported.") # (Bootstrap scenarios will use indirection through the bootlist) if bootlist is None: senario_numbers = list(range(len(self.callback_data))) scen_names = ["Scenario{}".format(i) for i in senario_numbers] else: scen_names = ["Scenario{}".format(i) for i in range(len(bootlist))] # tree_model.CallbackModule = None outer_cb_data = dict() outer_cb_data["callback"] = self._instance_creation_callback if ThetaVals is not None: outer_cb_data["ThetaVals"] = ThetaVals if bootlist is not None: outer_cb_data["BootList"] = bootlist outer_cb_data["cb_data"] = self.callback_data # None is OK outer_cb_data["theta_names"] = self.theta_names options = {"solver": "ipopt"} scenario_creator_options = {"cb_data": outer_cb_data} if use_mpisppy: ef = sputils.create_EF( scen_names, _experiment_instance_creation_callback, EF_name="_Q_opt", suppress_warnings=True, scenario_creator_kwargs=scenario_creator_options) else: ef = local_ef.create_EF( scen_names, _experiment_instance_creation_callback, EF_name="_Q_opt", suppress_warnings=True, scenario_creator_kwargs=scenario_creator_options) self.ef_instance = ef # Solve the extensive form with ipopt if solver == "ef_ipopt": if not calc_cov: # Do not calculate the reduced hessian solver = SolverFactory('ipopt') if self.solver_options is not None: for key in self.solver_options: solver.options[key] = self.solver_options[key] solve_result = solver.solve(ef, tee=self.tee) # The import error will be raised when we attempt to use # inv_reduced_hessian_barrier below. # #elif not asl_available: # raise ImportError("parmest requires ASL to calculate the " # "covariance matrix with solver 'ipopt'") else: # parmest makes the fitted parameters stage 1 variables ind_vars = [] for ndname, Var, solval in ef_nonants(ef): ind_vars.append(Var) # calculate the reduced hessian solve_result, inv_red_hes = \ inverse_reduced_hessian.inv_reduced_hessian_barrier( self.ef_instance, independent_variables= ind_vars, solver_options=self.solver_options, tee=self.tee) if self.diagnostic_mode: print(' Solver termination condition = ', str(solve_result.solver.termination_condition)) # assume all first stage are thetas... thetavals = {} for ndname, Var, solval in ef_nonants(ef): # process the name # the scenarios are blocks, so strip the scenario name vname = Var.name[Var.name.find(".") + 1:] thetavals[vname] = solval objval = pyo.value(ef.EF_Obj) if calc_cov: # Calculate the covariance matrix # Number of data points considered n = cov_n # Extract number of fitted parameters l = len(thetavals) # Assumption: Objective value is sum of squared errors sse = objval '''Calculate covariance assuming experimental observation errors are independent and follow a Gaussian distribution with constant variance. The formula used in parmest was verified against equations (7-5-15) and (7-5-16) in "Nonlinear Parameter Estimation", Y. Bard, 1974. This formula is also applicable if the objective is scaled by a constant; the constant cancels out. (was scaled by 1/n because it computes an expected value.) ''' cov = 2 * sse / (n - l) * inv_red_hes cov = pd.DataFrame(cov, index=thetavals.keys(), columns=thetavals.keys()) thetavals = pd.Series(thetavals) if len(return_values) > 0: var_values = [] for exp_i in self.ef_instance.component_objects( Block, descend_into=False): vals = {} for var in return_values: exp_i_var = exp_i.find_component(str(var)) if exp_i_var is None: # we might have a block such as _mpisppy_data continue temp = [pyo.value(_) for _ in exp_i_var.values()] if len(temp) == 1: vals[var] = temp[0] else: vals[var] = temp if len(vals) > 0: var_values.append(vals) var_values = pd.DataFrame(var_values) if calc_cov: return objval, thetavals, var_values, cov else: return objval, thetavals, var_values if calc_cov: return objval, thetavals, cov else: return objval, thetavals else: raise RuntimeError("Unknown solver in Q_Opt=" + solver)