def objective_at_theta(self, theta_values): """ Compute the objective over a range of theta values Parameters ---------- theta_values: DataFrame, columns=theta_names Values of theta used to compute the objective Returns ------- obj_at_theta: DataFrame Objective values for each theta value (infeasible solutions are omitted). """ # for parallel code we need to use lists and dicts in the loop theta_names = theta_values.columns all_thetas = theta_values.to_dict('records') task_mgr = mpiu.ParallelTaskManager(len(all_thetas)) local_thetas = task_mgr.global_to_local_data(all_thetas) # walk over the mesh, return objective function all_obj = list() for Theta in local_thetas: obj, thetvals, worststatus = self._Q_at_theta(Theta) if worststatus != pyo.TerminationCondition.infeasible: all_obj.append(list(Theta.values()) + [obj]) # DLW, Aug2018: should we also store the worst solver status? global_all_obj = task_mgr.allgather_global_data(all_obj) dfcols = list(theta_names) + ['obj'] obj_at_theta = pd.DataFrame(data=global_all_obj, columns=dfcols) return obj_at_theta
def theta_est_leaveNout(self, lNo, lNo_samples=None, seed=None, return_samples=False): """ Parameter estimation where N data points are left out of each sample Parameters ---------- lNo: int Number of data points to leave out for parameter estimation lNo_samples: int Number of leave-N-out samples. If lNo_samples=None, the maximum number of combinations will be used seed: int or None, optional Random seed return_samples: bool, optional Return a list of sample numbers that were left out Returns ------- lNo_theta: DataFrame Theta values for each sample and (if return_samples = True) the sample numbers left out of each estimation """ assert isinstance(lNo, int) assert isinstance(lNo_samples, (type(None), int)) assert isinstance(seed, (type(None), int)) assert isinstance(return_samples, bool) samplesize = len(self._numbers_list)-lNo if seed is not None: np.random.seed(seed) global_list = self._get_sample_list(samplesize, lNo_samples, replacement=False) task_mgr = mpiu.ParallelTaskManager(len(global_list)) local_list = task_mgr.global_to_local_data(global_list) # Reset numbers_list self._numbers_list = list(range(samplesize)) lNo_theta = list() for idx, sample in local_list: objval, thetavals = self.theta_est(bootlist=list(sample)) lNo_s = list(set(range(len(self.callback_data))) - set(sample)) thetavals['lNo'] = np.sort(lNo_s) lNo_theta.append(thetavals) # Reset numbers_list (back to original) self._numbers_list = list(range(len(self.callback_data))) global_bootstrap_theta = task_mgr.allgather_global_data(lNo_theta) lNo_theta = pd.DataFrame(global_bootstrap_theta) if not return_samples: del lNo_theta['lNo'] return lNo_theta
def likelihood_ratio(self, search_ranges=None): """ Compute the likelihood ratio and return the entire mesh Parameters ---------- search_ranges: `dictionary` of lists indexed by theta. Mesh points (might be optional in the future) Returns ------- SSE: `DataFrame` Sum of squared errors values for the entire mesh unless some mesh points are infeasible, which are omitted. """ #### def mesh_generator(search_ranges): # return the next theta point given by search_ranges """ from the web: def product_dict(**kwargs): keys = kwargs.keys() vals = kwargs.values() for instance in itertools.product(*vals): yield dict(zip(keys, instance)) """ keys = search_ranges.keys() vals = search_ranges.values() for prod in itertools.product(*vals): yield dict(zip(keys, prod)) # for parallel code we need to use lists and dicts in the loop all_SSE = list() global_mesh = list() MeshLen = 0 for Theta in mesh_generator(search_ranges): MeshLen += 1 global_mesh.append(Theta) task_mgr = mpiu.ParallelTaskManager(MeshLen) local_mesh = task_mgr.global_to_local_data(global_mesh) # walk over the mesh, using the objective function to get squared error for Theta in local_mesh: SSE, thetvals, worststatus = self.Q_at_theta(Theta) if worststatus != pyo.TerminationCondition.infeasible: all_SSE.append(list(Theta.values()) + [SSE]) # DLW, Aug2018: should we also store the worst solver status? global_all_SSE = task_mgr.allgather_global_data(all_SSE) dfcols = list(search_ranges.keys()) + ["SSE"] store_all_SSE = pd.DataFrame(data=global_all_SSE, columns=dfcols) return store_all_SSE
def bootstrap(self, N): """ Run parameter estimation using N bootstap samples Parameters ---------- N: `int` Number of bootstrap samples to draw Returns ------- bootstrap_theta_list: `DataFrame` Samples and theta values from the bootstrap """ bootstrap_theta = list() samplesize = len(self.numbers_list) task_mgr = mpiu.ParallelTaskManager(N) global_bootlist = list() for i in range(N): j = unique_samples = 0 while unique_samples <= len(self.thetalist): bootlist = np.random.choice(self.numbers_list, samplesize, replace=True) unique_samples = len(np.unique(bootlist)) j += 1 if j > N: # arbitrary timeout limit raise RuntimeError("Internal error: timeout in bootstrap"+\ " constructing a sample; possible hint:"+\ " the dim of theta may be too close to N") global_bootlist.append((i, bootlist)) local_bootlist = task_mgr.global_to_local_data(global_bootlist) for idx, bootlist in local_bootlist: #print('Bootstrap Run Number: ', idx + 1, ' out of ', N) objval, thetavals = self.theta_est(bootlist=bootlist) thetavals['samples'] = bootlist bootstrap_theta.append(thetavals) #, ignore_index=True) global_bootstrap_theta = task_mgr.allgather_global_data( bootstrap_theta) bootstrap_theta = pd.DataFrame(global_bootstrap_theta) #bootstrap_theta.set_index('samples', inplace=True) return bootstrap_theta
def theta_est_bootstrap(self, N, samplesize=None, replacement=True, seed=None, return_samples=False): """ Run parameter estimation using N bootstap samples Parameters ---------- N: int Number of bootstrap samples to draw from the data samplesize: int or None, optional Sample size, if None samplesize will be set to the number of experiments replacement: bool, optional Sample with or without replacement seed: int or None, optional Set the random seed return_samples: bool, optional Return a list of experiment numbers used in each bootstrap estimation Returns ------- bootstrap_theta: DataFrame Theta values for each bootstrap sample and (if return_samples = True) the sample numbers used in each estimation """ bootstrap_theta = list() if samplesize is None: samplesize = len(self._numbers_list) if seed is not None: np.random.seed(seed) task_mgr = mpiu.ParallelTaskManager(N) global_bootlist = list() for i in range(N): j = unique_samples = 0 while unique_samples <= len(self.theta_names): bootlist = np.random.choice(self._numbers_list, samplesize, replace=replacement) unique_samples = len(np.unique(bootlist)) j += 1 if j > N: # arbitrary timeout limit raise RuntimeError("Internal error: timeout in bootstrap"+\ " constructing a sample; possible hint:"+\ " the dim of theta may be too close to N") global_bootlist.append((i, bootlist)) local_bootlist = task_mgr.global_to_local_data(global_bootlist) for idx, bootlist in local_bootlist: #print('Bootstrap Run Number: ', idx + 1, ' out of ', N) objval, thetavals = self.theta_est(bootlist=bootlist) thetavals['samples'] = bootlist bootstrap_theta.append(thetavals)#, ignore_index=True) global_bootstrap_theta = task_mgr.allgather_global_data(bootstrap_theta) bootstrap_theta = pd.DataFrame(global_bootstrap_theta) #bootstrap_theta.set_index('samples', inplace=True) if not return_samples: del bootstrap_theta['samples'] return bootstrap_theta
def theta_est_bootstrap(self, bootstrap_samples, samplesize=None, replacement=True, seed=None, return_samples=False): """ Parameter estimation using bootstrap resampling of the data Parameters ---------- bootstrap_samples: int Number of bootstrap samples to draw from the data samplesize: int or None, optional Size of each bootstrap sample. If samplesize=None, samplesize will be set to the number of samples in the data replacement: bool, optional Sample with or without replacement seed: int or None, optional Random seed return_samples: bool, optional Return a list of sample numbers used in each bootstrap estimation Returns ------- bootstrap_theta: DataFrame Theta values for each sample and (if return_samples = True) the sample numbers used in each estimation """ assert isinstance(bootstrap_samples, int) assert isinstance(samplesize, (type(None), int)) assert isinstance(replacement, bool) assert isinstance(seed, (type(None), int)) assert isinstance(return_samples, bool) if samplesize is None: samplesize = len(self._numbers_list) if seed is not None: np.random.seed(seed) global_list = self._get_sample_list(samplesize, bootstrap_samples, replacement) task_mgr = mpiu.ParallelTaskManager(bootstrap_samples) local_list = task_mgr.global_to_local_data(global_list) # Reset numbers_list self._numbers_list = list(range(samplesize)) bootstrap_theta = list() for idx, sample in local_list: objval, thetavals = self.theta_est(bootlist=list(sample)) thetavals['samples'] = sample bootstrap_theta.append(thetavals) # Reset numbers_list (back to original) self._numbers_list = list(range(len(self.callback_data))) global_bootstrap_theta = task_mgr.allgather_global_data(bootstrap_theta) bootstrap_theta = pd.DataFrame(global_bootstrap_theta) if not return_samples: del bootstrap_theta['samples'] return bootstrap_theta