def subset_pps5(self, nsamp): """ Return a sample of nsamp distinct items from the population, sampled without replacement with probability proportional to size (PPS) according to Sampford's sampling scheme. 5-table lookup samplers are used within Sampford's algorithm to accelerate the sampling for large populations. """ # Copy the whole population if nsamp = npopn. if nsamp == self.npopn: return [item for item in self.items] set_rng_state(*random.get_state()) if self.equiprob: pool = arange(self.npopn) indices = equalprob(nsamp, pool) else: # This part of setup has to be done before any sampling. if not self.did_init: print('Initing ppssampler...') self.sampler = _ppssampler(self.weights) self.did_init = True # This part has to be done before any sampling w/o replacement. if not self.did_Sampford_init: print('Initing wts...') self.sort_indices, self.sort_wts, self.tot_wt = \ self.sampler.prepwts(self.weights) self.max_wt = self.sort_wts[0] / self.tot_wt # Max wt, normed self.nsamp = 0 self.did_Sampford_init = True # This part has to be done when sample size changes. if self.nsamp != nsamp: print('Initing ratios...') if nsamp > self.npopn: raise ValueError('nsamp larger than population size!') if nsamp * self.max_wt > 1: raise ValueError('Sample size too large for PPS sampling!') self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt) self.sampler.prepratiotables() self.did_Sampford_tables = True self.nsamp = nsamp # This may happen if subset_pps is called before subset_pps5. if not self.did_Sampford_tables: print('Initing ratio tables...') self.sampler.prepratiotables() self.did_Sampford_tables = True self.ntry, indices = self.sampler.samplenr5() # Note the 5-table version returns unsorted indices. # indices = [self.sort_indices[i] for i in sindices] result = [self.items[i] for i in indices] random.set_state(get_rng_state()) return result
def subset_pps5(self, nsamp): """ Return a sample of nsamp distinct items from the population, sampled without replacement with probability proportional to size (PPS) according to Sampford's sampling scheme. 5-table lookup samplers are used within Sampford's algorithm to accelerate the sampling for large populations. """ # Copy the whole population if nsamp = npopn. if nsamp == self.npopn: return [item for item in self.items] set_rng_state(*random.get_state()) if self.equiprob: pool = arange(self.npopn) indices = equalprob(nsamp, pool) else: # This part of setup has to be done before any sampling. if not self.did_init: print 'Initing ppssampler...' self.sampler = _ppssampler(self.weights) self.did_init = True # This part has to be done before any sampling w/o replacement. if not self.did_Sampford_init: print 'Initing wts...' self.sort_indices, self.sort_wts, self.tot_wt = \ self.sampler.prepwts(self.weights) self.max_wt = self.sort_wts[0]/self.tot_wt # Max wt, normed self.nsamp = 0 self.did_Sampford_init = True # This part has to be done when sample size changes. if self.nsamp != nsamp: print 'Initing ratios...' if nsamp > self.npopn: raise ValueError, 'nsamp larger than population size!' if nsamp*self.max_wt > 1: raise ValueError, 'Sample size too large for PPS sampling!' self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt) self.sampler.prepratiotables() self.did_Sampford_tables = True self.nsamp = nsamp # This may happen if subset_pps is called before subset_pps5. if not self.did_Sampford_tables: print 'Initing ratio tables...' self.sampler.prepratiotables() self.did_Sampford_tables = True self.ntry, indices = self.sampler.samplenr5() # Note the 5-table version returns unsorted indices. # indices = [self.sort_indices[i] for i in sindices] result = [self.items[i] for i in indices] random.set_state(get_rng_state()) return result
def sample(self, nsamp): """ Return a set of nsamp samples from the population, sampled with replacement. """ # *** Implement equiprob case. if self.equiprob: raise NotImplementedError('Awaiting code...') if not self.did_init: self.sampler = _ppssampler(self.weights) self.did_init = True # Track the RNG state within the sampler, to update NumPy's RNG state. # Internally we only use the MT state; any extra state for cached # normal or other samples can just be copied. rng_state = random.get_state() mt_state, extra_state = rng_state[:3], rng_state[3:] set_rng_state(*mt_state) # *** modify to handle full rng state indices = self.sampler.sample(nsamp) new_state = list(get_rng_state()) new_state.extend(extra_state) random.set_state(new_state) return [self.items[i] for i in indices]
def sample(self, nsamp): """ Return a set of nsamp samples from the population, sampled with replacement. """ # *** Implement equiprob case. if self.equiprob: raise NotImplementedError, 'Awaiting code...' if not self.did_init: self.sampler = _ppssampler(self.weights) self.did_init = True # Track the RNG state within the sampler, to update NumPy's RNG state. # Internally we only use the MT state; any extra state for cached # normal or other samples can just be copied. rng_state = random.get_state() mt_state, extra_state = rng_state[:3], rng_state[3:] set_rng_state(*mt_state) # *** modify to handle full rng state indices = self.sampler.sample(nsamp) new_state = list(get_rng_state()) new_state.extend(extra_state) random.set_state(new_state) return [self.items[i] for i in indices]