Beispiel #1
0
 def subset_pps5(self, nsamp):
     """
     Return a sample of nsamp distinct items from the population, sampled 
     without replacement with probability proportional to size (PPS)
     according to Sampford's sampling scheme.
     
     5-table lookup samplers are used within Sampford's algorithm to
     accelerate the sampling for large populations.
     """
     # Copy the whole population if nsamp = npopn.
     if nsamp == self.npopn:
         return [item for item in self.items]
     set_rng_state(*random.get_state())
     if self.equiprob:
         pool = arange(self.npopn)
         indices = equalprob(nsamp, pool)
     else:
         # This part of setup has to be done before any sampling.
         if not self.did_init:
             print('Initing ppssampler...')
             self.sampler = _ppssampler(self.weights)
             self.did_init = True
         # This part has to be done before any sampling w/o replacement.
         if not self.did_Sampford_init:
             print('Initing wts...')
             self.sort_indices, self.sort_wts, self.tot_wt = \
                 self.sampler.prepwts(self.weights)
             self.max_wt = self.sort_wts[0] / self.tot_wt  # Max wt, normed
             self.nsamp = 0
             self.did_Sampford_init = True
         # This part has to be done when sample size changes.
         if self.nsamp != nsamp:
             print('Initing ratios...')
             if nsamp > self.npopn:
                 raise ValueError('nsamp larger than population size!')
             if nsamp * self.max_wt > 1:
                 raise ValueError('Sample size too large for PPS sampling!')
             self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt)
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
             self.nsamp = nsamp
         # This may happen if subset_pps is called before subset_pps5.
         if not self.did_Sampford_tables:
             print('Initing ratio tables...')
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
         self.ntry, indices = self.sampler.samplenr5()
         # Note the 5-table version returns unsorted indices.
         # indices = [self.sort_indices[i] for i in sindices]
     result = [self.items[i] for i in indices]
     random.set_state(get_rng_state())
     return result
Beispiel #2
0
 def subset_pps5(self, nsamp):
     """
     Return a sample of nsamp distinct items from the population, sampled 
     without replacement with probability proportional to size (PPS)
     according to Sampford's sampling scheme.
     
     5-table lookup samplers are used within Sampford's algorithm to
     accelerate the sampling for large populations.
     """
     # Copy the whole population if nsamp = npopn.
     if nsamp == self.npopn:
         return [item for item in self.items]
     set_rng_state(*random.get_state())
     if self.equiprob:
         pool = arange(self.npopn)
         indices = equalprob(nsamp, pool)
     else:
         # This part of setup has to be done before any sampling.
         if not self.did_init:
             print 'Initing ppssampler...'
             self.sampler = _ppssampler(self.weights)
             self.did_init = True
         # This part has to be done before any sampling w/o replacement.
         if not self.did_Sampford_init:
             print 'Initing wts...'
             self.sort_indices, self.sort_wts, self.tot_wt = \
                 self.sampler.prepwts(self.weights)
             self.max_wt = self.sort_wts[0]/self.tot_wt  # Max wt, normed
             self.nsamp = 0
             self.did_Sampford_init = True
         # This part has to be done when sample size changes.
         if self.nsamp != nsamp:
             print 'Initing ratios...'
             if nsamp > self.npopn:
                 raise ValueError, 'nsamp larger than population size!'
             if nsamp*self.max_wt > 1:
                 raise ValueError, 'Sample size too large for PPS sampling!'
             self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt)
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
             self.nsamp = nsamp
         # This may happen if subset_pps is called before subset_pps5.
         if not self.did_Sampford_tables:
             print 'Initing ratio tables...'
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
         self.ntry, indices = self.sampler.samplenr5()
         # Note the 5-table version returns unsorted indices.
         # indices = [self.sort_indices[i] for i in sindices]
     result = [self.items[i] for i in indices]
     random.set_state(get_rng_state())
     return result
Beispiel #3
0
for i in range(100):
    local_rand()
print 'Should be dif: ', rand(), local_rand()
random.set_state(get_rng_state())
print 'Should be same:', rand(), local_rand()
print

# Check equal-weight samplers.
print '*** Checking equiprobability samplers ***'
pool = zeros(10)  # Workspace
samp = equalprobi(5,10,pool)
print 'equalprobi:', samp

# Check equalprob with a list input.
pool = range(10,20)
samp = equalprob(5,pool)
print 'equalprob:', samp
# Check the original pool is unchanged.
print pool

# Check it again with an array.
pool = array(pool)
samp = equalprob(5,pool)
print 'equalprob:', samp
print pool
print

# Check PPS.
print '*** Checking Population samplers ***'
p5 = Population(['a', 'b', 'c', 'd', 'e'])
wts = array([1., .5, .2, 2.5, 1., .5, .7, .8, .7])