def _get_sample_list(self, samplesize, num_samples, replacement=True): samplelist = list() if num_samples is None: # This could get very large for i, l in enumerate(combinations(self._numbers_list, samplesize)): samplelist.append((i, np.sort(l))) else: for i in range(num_samples): attempts = 0 unique_samples = 0 # check for duplicates in each sample duplicate = False # check for duplicates between samples while (unique_samples <= len(self.theta_names)) and (not duplicate): sample = np.random.choice(self._numbers_list, samplesize, replace=replacement) sample = np.sort(sample).tolist() unique_samples = len(np.unique(sample)) if sample in samplelist: duplicate = True attempts += 1 if attempts > num_samples: # arbitrary timeout limit raise RuntimeError("""Internal error: timeout constructing a sample, the dim of theta may be too close to the samplesize""") samplelist.append((i, sample)) return samplelist
def calc_cp(alpha, beta, k): gamma = [] factorial = numpy.math.factorial for i in range(k + 1): num = factorial(alpha + k) * factorial(alpha + beta + k + i) denom = factorial(alpha + i) * factorial(k - i) * factorial(i) gamma.insert(i, num / denom) poly = [] for i in range(k + 1): if i == 0: poly.insert(i, gamma[i]) else: prod = [1] j = 1 while j <= i: prod = conv(prod, [1, -1]) j += 1 while len(poly) < len(prod): poly.insert(0, 0) prod = [gamma[i] * t for t in prod] poly = [sum(pair) for pair in zip(poly, prod)] cp = numpy.roots(poly) return numpy.sort(cp).tolist()
def theta_est_leaveNout(self, lNo, lNo_samples=None, seed=None, return_samples=False): """ Parameter estimation where N data points are left out of each sample Parameters ---------- lNo: int Number of data points to leave out for parameter estimation lNo_samples: int Number of leave-N-out samples. If lNo_samples=None, the maximum number of combinations will be used seed: int or None, optional Random seed return_samples: bool, optional Return a list of sample numbers that were left out Returns ------- lNo_theta: DataFrame Theta values for each sample and (if return_samples = True) the sample numbers left out of each estimation """ assert isinstance(lNo, int) assert isinstance(lNo_samples, (type(None), int)) assert isinstance(seed, (type(None), int)) assert isinstance(return_samples, bool) samplesize = len(self._numbers_list)-lNo if seed is not None: np.random.seed(seed) global_list = self._get_sample_list(samplesize, lNo_samples, replacement=False) task_mgr = mpiu.ParallelTaskManager(len(global_list)) local_list = task_mgr.global_to_local_data(global_list) # Reset numbers_list self._numbers_list = list(range(samplesize)) lNo_theta = list() for idx, sample in local_list: objval, thetavals = self.theta_est(bootlist=list(sample)) lNo_s = list(set(range(len(self.callback_data))) - set(sample)) thetavals['lNo'] = np.sort(lNo_s) lNo_theta.append(thetavals) # Reset numbers_list (back to original) self._numbers_list = list(range(len(self.callback_data))) global_bootstrap_theta = task_mgr.allgather_global_data(lNo_theta) lNo_theta = pd.DataFrame(global_bootstrap_theta) if not return_samples: del lNo_theta['lNo'] return lNo_theta