def initialize_pyrngs(): from gslrandom import PyRNG, get_omp_num_threads if "OMP_NUM_THREADS" in os.environ: num_threads = os.environ["OMP_NUM_THREADS"] else: num_threads = get_omp_num_threads() assert num_threads > 0 # Choose random seeds seeds = np.random.randint(2**16, size=num_threads) return [PyRNG(seed) for seed in seeds]
def __init__(self, model, T, S, F, minibatchfrac=1.): """ Initialize a parent array Z of size TxKxKxB to model the event parents for data matrix S (TxK) which has been filtered to create filtered data array F (TxKxB). Also create a background parent array of size TxK to specify how many events are attributed to the background process. :param T: Number of time bins :param K: Number of processes :param B: Number of basis functions :param S: Data matrix (TxK) :param F: Filtered data matrix (TxKxB) """ self.model = model self.dt = model.dt self.K = model.K self.B = model.B # TODO: Remove dependencies on S and F self.T = T # self.S = S # self.F = F self.minibatchfrac = minibatchfrac # Save sparse versions of S and F self.ts = [] self.Ts = [] self.Ns = [] self.Ss = [] self.Fs = [] for k in range(self.K): # Find the rows where S[:,k] is nonzero tk = np.where(S[:, k])[0] self.ts.append(tk) self.Ts.append(len(tk)) self.Ss.append(S[tk, k].astype(np.uint32)) self.Ns.append(S[tk, k].sum()) self.Fs.append(F[tk]) self.Ns = np.array(self.Ns) # The base class handles the parent variables # We use a sparse representation that only considers times (rows) # where there is a spike self._Z = None self._EZ = None # Initialize GSL RNGs for resampling Z from gslrandom import PyRNG, get_omp_num_threads num_threads = max(1, get_omp_num_threads()) seeds = np.random.randint(2**16, size=num_threads) self.pyrngs = [PyRNG(seed) for seed in seeds]
def test_one_rng_one_N_one_p_no_out(): K = 5 N = 10 p_K = np.ones(K) / K rng = PyRNG(rn.randint(2**16)) n_iter = 10000 z_K = np.zeros(K) for _ in xrange(n_iter): n_K = multinomial(rng, N, p_K) assert n_K.sum() == N z_K += n_K assert np.allclose(z_K / z_K.sum(), p_K, atol=1e-2)
def test_one_rng_one_N_multi_p_no_out(): K = 5 I = 3 N = 10 p_IK = np.ones((I, K)) / K p_IK[0, :] = [0.5, 0.25, 0.05, 0.1, 0.1] # make one non-uniform rng = PyRNG(rn.randint(2**16)) n_iter = 10000 z_IK = np.zeros((I, K)) for _ in xrange(n_iter): n_IK = multinomial(rng, N, p_IK) assert n_IK.shape == (I, K) assert (n_IK.sum(axis=1) == N).all() z_IK += n_IK norm_z_IK = z_IK.astype(float) / np.sum(z_IK, axis=1, keepdims=True) assert np.allclose(norm_z_IK, p_IK, atol=1e-2)
def test_one_rng_multi_N_one_p_with_out(): K = 5 I = 10 N_I = np.ones(I) * 10 p_K = np.ones(K) / K rng = PyRNG(rn.randint(2**16)) n_iter = 10000 z_IK = np.zeros((I, K)) for _ in xrange(n_iter): n_IK = np.zeros((I, K)) multinomial(rng, N_I, p_K, out=n_IK) assert np.allclose(n_IK.sum(axis=1), N_I) z_IK += n_IK norm_z_IK = z_IK.astype(float) / np.sum(z_IK, axis=1, keepdims=True) p_IK = np.ones((I, K)) * p_K assert np.allclose(norm_z_IK, p_IK, atol=1e-2)
def test_one_rng_multi_N_multi_p_with_out(): K = 5 I = 3 N_I = np.arange(1, I + 1) * 10 p_IK = np.ones((I, K)) / K p_IK[0, :] = [0.5, 0.25, 0.05, 0.1, 0.1] # make one non-uniform rng = PyRNG(rn.randint(2**16)) n_iter = 10000 z_IK = np.zeros((I, K)) for _ in xrange(n_iter): n_IK = np.zeros((I, K)) multinomial(rng, N_I, p_IK, out=n_IK) np.allclose(n_IK.sum(axis=1), N_I) z_IK += n_IK norm_z_IK = z_IK.astype(float) / np.sum(z_IK, axis=1, keepdims=True) assert np.allclose(norm_z_IK, p_IK, atol=1e-2)
def test_multi_rng_multi_N_multi_p_with_out(): K = 5 A = 3 B = 2 N_AB = (np.arange(1, A * B + 1) * 10).reshape((A, B)) p_ABK = np.ones((A, B, K)) / K p_ABK[0, 1, :] = [0.5, 0.25, 0.05, 0.1, 0.1] # make one non-uniform p_ABK[1, 0, :] = [0.9, 0.05, 0.03, 0.01, 0.01] # make one really non-uniform rngs = [PyRNG(rn.randint(2**16)) for _ in xrange(get_omp_num_threads())] n_iter = 10000 z_ABK = np.zeros((A, B, K)) for _ in xrange(n_iter): n_ABK = np.zeros((A, B, K), dtype=np.uint32) multinomial(rngs, N_AB, p_ABK, out=n_ABK) np.allclose(n_ABK.sum(axis=-1), N_AB) z_ABK += n_ABK norm_z_ABK = z_ABK.astype(float) / np.sum(z_ABK, axis=-1, keepdims=True) assert np.allclose(norm_z_ABK, p_ABK, atol=1e-2)
import time I = 100000 K = 1000 N_I = rn.poisson(rn.gamma(2, 500, size=I)).astype(np.uint32) P_IK = 1. / K * np.ones((I, K), dtype=np.float) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() seeded_multinomial(N_I, P_IK, N_IK) print '%fs: No PyRNG parallel version with %d cores' % (time.time() - s, get_omp_num_threads()) assert (N_IK.sum(axis=1) == N_I).all() rngs = [PyRNG(rn.randint(2**16)) for _ in xrange(get_omp_num_threads())] N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rngs, N_I, P_IK, N_IK) print '%fs: PyRNG parallel version with %d cores' % (time.time() - s, len(rngs)) assert (N_IK.sum(axis=1) == N_I).all() rng = PyRNG(rn.randint(2**16)) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rng, N_I, P_IK, N_IK) print '%fs: PyRNG version with 1 core' % (time.time() - s) assert (N_IK.sum(axis=1) == N_I).all() rng = PyRNG()
def coupling_random_sample(n, l, s, threads): P = np.empty((s, len(l)), dtype=np.uint32) l_tile = np.tile(l, (s, 1)) rngs = [PyRNG(np.random.randint(2**16)) for _ in range(threads)] return multinomial(rngs, n, l_tile, P)
def __init__(self, model, T, S, F): """ Initialize a parent array Z of size TxKxKxB to model the event parents for data matrix S (TxK) which has been filtered to create filtered data array F (TxKxB). Also create a background parent array of size TxK to specify how many events are attributed to the background process. :param T: Number of time bins :param K: Number of processes :param B: Number of basis functions :param S: Data matrix (TxK) :param F: Filtered data matrix (TxKxB) """ self.model = model self.dt = model.dt self.K = model.K self.B = model.B # TODO: Remove dependencies on S and F self.T = T self.S = S self.F = F # Save sparse versions of S and F self.ts = [] self.Ts = [] self.Ns = [] self.Ss = [] self.Fs = [] for k in range(self.K): # Find the rows where S[:,k] is nonzero tk = np.where(S[:, k])[0] self.ts.append(tk) self.Ts.append(len(tk)) self.Ss.append(S[tk, k].astype(np.uint32)) self.Ns.append(S[tk, k].sum()) self.Fs.append(F[tk]) self.Ns = np.array(self.Ns) # The base class handles the parent variables # We use a sparse representation that only considers times (rows) # where there is a spike self._Z = None self._EZ = None # Initialize GSL RNGs for resampling Z try: from gslrandom import PyRNG, get_omp_num_threads if "OMP_NUM_THREADS" in os.environ: num_threads = os.environ["OMP_NUM_THREADS"] else: num_threads = get_omp_num_threads() assert num_threads > 0 # Choose random seeds seeds = np.random.randint(2**16, size=num_threads) self.pyrngs = [PyRNG(seed) for seed in seeds] self.USE_GSL = True except: warn( "Failed to import gslrandom for parallel multinomial sampling. " "Defaulting to pure python instead. " "This will have a significant impact on performance. " "To install gslrandom, see https://github.com/slinderman/gslrandom" ) self.USE_GSL = False