def subset_pps5(self, nsamp): """ Return a sample of nsamp distinct items from the population, sampled without replacement with probability proportional to size (PPS) according to Sampford's sampling scheme. 5-table lookup samplers are used within Sampford's algorithm to accelerate the sampling for large populations. """ # Copy the whole population if nsamp = npopn. if nsamp == self.npopn: return [item for item in self.items] set_rng_state(*random.get_state()) if self.equiprob: pool = arange(self.npopn) indices = equalprob(nsamp, pool) else: # This part of setup has to be done before any sampling. if not self.did_init: print('Initing ppssampler...') self.sampler = _ppssampler(self.weights) self.did_init = True # This part has to be done before any sampling w/o replacement. if not self.did_Sampford_init: print('Initing wts...') self.sort_indices, self.sort_wts, self.tot_wt = \ self.sampler.prepwts(self.weights) self.max_wt = self.sort_wts[0] / self.tot_wt # Max wt, normed self.nsamp = 0 self.did_Sampford_init = True # This part has to be done when sample size changes. if self.nsamp != nsamp: print('Initing ratios...') if nsamp > self.npopn: raise ValueError('nsamp larger than population size!') if nsamp * self.max_wt > 1: raise ValueError('Sample size too large for PPS sampling!') self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt) self.sampler.prepratiotables() self.did_Sampford_tables = True self.nsamp = nsamp # This may happen if subset_pps is called before subset_pps5. if not self.did_Sampford_tables: print('Initing ratio tables...') self.sampler.prepratiotables() self.did_Sampford_tables = True self.ntry, indices = self.sampler.samplenr5() # Note the 5-table version returns unsorted indices. # indices = [self.sort_indices[i] for i in sindices] result = [self.items[i] for i in indices] random.set_state(get_rng_state()) return result
def subset_pps5(self, nsamp): """ Return a sample of nsamp distinct items from the population, sampled without replacement with probability proportional to size (PPS) according to Sampford's sampling scheme. 5-table lookup samplers are used within Sampford's algorithm to accelerate the sampling for large populations. """ # Copy the whole population if nsamp = npopn. if nsamp == self.npopn: return [item for item in self.items] set_rng_state(*random.get_state()) if self.equiprob: pool = arange(self.npopn) indices = equalprob(nsamp, pool) else: # This part of setup has to be done before any sampling. if not self.did_init: print 'Initing ppssampler...' self.sampler = _ppssampler(self.weights) self.did_init = True # This part has to be done before any sampling w/o replacement. if not self.did_Sampford_init: print 'Initing wts...' self.sort_indices, self.sort_wts, self.tot_wt = \ self.sampler.prepwts(self.weights) self.max_wt = self.sort_wts[0]/self.tot_wt # Max wt, normed self.nsamp = 0 self.did_Sampford_init = True # This part has to be done when sample size changes. if self.nsamp != nsamp: print 'Initing ratios...' if nsamp > self.npopn: raise ValueError, 'nsamp larger than population size!' if nsamp*self.max_wt > 1: raise ValueError, 'Sample size too large for PPS sampling!' self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt) self.sampler.prepratiotables() self.did_Sampford_tables = True self.nsamp = nsamp # This may happen if subset_pps is called before subset_pps5. if not self.did_Sampford_tables: print 'Initing ratio tables...' self.sampler.prepratiotables() self.did_Sampford_tables = True self.ntry, indices = self.sampler.samplenr5() # Note the 5-table version returns unsorted indices. # indices = [self.sort_indices[i] for i in sindices] result = [self.items[i] for i in indices] random.set_state(get_rng_state()) return result
for i in range(100): local_rand() print 'Should be dif: ', rand(), local_rand() random.set_state(get_rng_state()) print 'Should be same:', rand(), local_rand() print # Check equal-weight samplers. print '*** Checking equiprobability samplers ***' pool = zeros(10) # Workspace samp = equalprobi(5,10,pool) print 'equalprobi:', samp # Check equalprob with a list input. pool = range(10,20) samp = equalprob(5,pool) print 'equalprob:', samp # Check the original pool is unchanged. print pool # Check it again with an array. pool = array(pool) samp = equalprob(5,pool) print 'equalprob:', samp print pool print # Check PPS. print '*** Checking Population samplers ***' p5 = Population(['a', 'b', 'c', 'd', 'e']) wts = array([1., .5, .2, 2.5, 1., .5, .7, .8, .7])