Example #1
0
 def subset_pps5(self, nsamp):
     """
     Return a sample of nsamp distinct items from the population, sampled 
     without replacement with probability proportional to size (PPS)
     according to Sampford's sampling scheme.
     
     5-table lookup samplers are used within Sampford's algorithm to
     accelerate the sampling for large populations.
     """
     # Copy the whole population if nsamp = npopn.
     if nsamp == self.npopn:
         return [item for item in self.items]
     set_rng_state(*random.get_state())
     if self.equiprob:
         pool = arange(self.npopn)
         indices = equalprob(nsamp, pool)
     else:
         # This part of setup has to be done before any sampling.
         if not self.did_init:
             print('Initing ppssampler...')
             self.sampler = _ppssampler(self.weights)
             self.did_init = True
         # This part has to be done before any sampling w/o replacement.
         if not self.did_Sampford_init:
             print('Initing wts...')
             self.sort_indices, self.sort_wts, self.tot_wt = \
                 self.sampler.prepwts(self.weights)
             self.max_wt = self.sort_wts[0] / self.tot_wt  # Max wt, normed
             self.nsamp = 0
             self.did_Sampford_init = True
         # This part has to be done when sample size changes.
         if self.nsamp != nsamp:
             print('Initing ratios...')
             if nsamp > self.npopn:
                 raise ValueError('nsamp larger than population size!')
             if nsamp * self.max_wt > 1:
                 raise ValueError('Sample size too large for PPS sampling!')
             self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt)
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
             self.nsamp = nsamp
         # This may happen if subset_pps is called before subset_pps5.
         if not self.did_Sampford_tables:
             print('Initing ratio tables...')
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
         self.ntry, indices = self.sampler.samplenr5()
         # Note the 5-table version returns unsorted indices.
         # indices = [self.sort_indices[i] for i in sindices]
     result = [self.items[i] for i in indices]
     random.set_state(get_rng_state())
     return result
Example #2
0
 def subset_pps5(self, nsamp):
     """
     Return a sample of nsamp distinct items from the population, sampled 
     without replacement with probability proportional to size (PPS)
     according to Sampford's sampling scheme.
     
     5-table lookup samplers are used within Sampford's algorithm to
     accelerate the sampling for large populations.
     """
     # Copy the whole population if nsamp = npopn.
     if nsamp == self.npopn:
         return [item for item in self.items]
     set_rng_state(*random.get_state())
     if self.equiprob:
         pool = arange(self.npopn)
         indices = equalprob(nsamp, pool)
     else:
         # This part of setup has to be done before any sampling.
         if not self.did_init:
             print 'Initing ppssampler...'
             self.sampler = _ppssampler(self.weights)
             self.did_init = True
         # This part has to be done before any sampling w/o replacement.
         if not self.did_Sampford_init:
             print 'Initing wts...'
             self.sort_indices, self.sort_wts, self.tot_wt = \
                 self.sampler.prepwts(self.weights)
             self.max_wt = self.sort_wts[0]/self.tot_wt  # Max wt, normed
             self.nsamp = 0
             self.did_Sampford_init = True
         # This part has to be done when sample size changes.
         if self.nsamp != nsamp:
             print 'Initing ratios...'
             if nsamp > self.npopn:
                 raise ValueError, 'nsamp larger than population size!'
             if nsamp*self.max_wt > 1:
                 raise ValueError, 'Sample size too large for PPS sampling!'
             self.sampler.prepratios(nsamp, self.sort_wts, self.tot_wt)
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
             self.nsamp = nsamp
         # This may happen if subset_pps is called before subset_pps5.
         if not self.did_Sampford_tables:
             print 'Initing ratio tables...'
             self.sampler.prepratiotables()
             self.did_Sampford_tables = True
         self.ntry, indices = self.sampler.samplenr5()
         # Note the 5-table version returns unsorted indices.
         # indices = [self.sort_indices[i] for i in sindices]
     result = [self.items[i] for i in indices]
     random.set_state(get_rng_state())
     return result
Example #3
0
 def sample(self, nsamp):
     """
     Return a set of nsamp samples from the population, sampled with
     replacement.
     """
     # *** Implement equiprob case.
     if self.equiprob:
         raise NotImplementedError('Awaiting code...')
     if not self.did_init:
         self.sampler = _ppssampler(self.weights)
         self.did_init = True
     # Track the RNG state within the sampler, to update NumPy's RNG state.
     # Internally we only use the MT state; any extra state for cached
     # normal or other samples can just be copied.
     rng_state = random.get_state()
     mt_state, extra_state = rng_state[:3], rng_state[3:]
     set_rng_state(*mt_state)  # *** modify to handle full rng state
     indices = self.sampler.sample(nsamp)
     new_state = list(get_rng_state())
     new_state.extend(extra_state)
     random.set_state(new_state)
     return [self.items[i] for i in indices]
Example #4
0
 def sample(self, nsamp):
     """
     Return a set of nsamp samples from the population, sampled with
     replacement.
     """
     # *** Implement equiprob case.
     if self.equiprob:
         raise NotImplementedError, 'Awaiting code...'
     if not self.did_init:
         self.sampler = _ppssampler(self.weights)
         self.did_init = True
     # Track the RNG state within the sampler, to update NumPy's RNG state.
     # Internally we only use the MT state; any extra state for cached
     # normal or other samples can just be copied.
     rng_state = random.get_state()
     mt_state, extra_state = rng_state[:3], rng_state[3:]
     set_rng_state(*mt_state)  # *** modify to handle full rng state
     indices = self.sampler.sample(nsamp)
     new_state = list(get_rng_state())
     new_state.extend(extra_state)
     random.set_state(new_state)
     return [self.items[i] for i in indices]
Example #5
0
from numpy import zeros, array, rand, linspace, random, Float, ones, arange, sum
from _ppssampler import set_rng_state, get_rng_state, local_irand, local_rand
from _ppssampler import equalprob, equalprobi
from population import *
from messages import restart, elapsedcpu

# Check passing RandomKit state back and forth with numpy.
print '*** Checking RandomKit state maintenance ***'
state0 = random.get_state()
id, key, pos = state0
set_rng_state(id, key, pos)
print 'Should be same:', rand(), local_rand()
for i in range(100):
    local_rand()
print 'Should be dif: ', rand(), local_rand()
random.set_state(get_rng_state())
print 'Should be same:', rand(), local_rand()
print

# Check equal-weight samplers.
print '*** Checking equiprobability samplers ***'
pool = zeros(10)  # Workspace
samp = equalprobi(5,10,pool)
print 'equalprobi:', samp

# Check equalprob with a list input.
pool = range(10,20)
samp = equalprob(5,pool)
print 'equalprob:', samp
# Check the original pool is unchanged.
print pool