def test_consistency_GPUA_parallel(): # Verify that the random numbers generated by GPUA_mrg_uniform, in # parallel, are the same as the reference (Java) implementation by # L'Ecuyer et al. seed = 12345 n_samples = 5 n_streams = 12 n_substreams = 7 # 7 samples will be drawn in parallel samples = [] curr_rstate = np.array([seed] * 6, dtype="int32") for i in range(n_streams): stream_samples = [] rstate = [curr_rstate.copy()] for j in range(1, n_substreams): rstate.append(rng_mrg.ff_2p72(rstate[-1])) rstate = np.asarray(rstate) rstate = gpuarray_shared_constructor(rstate) new_rstate, sample = GPUA_mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(n_substreams, )) rstate.default_update = new_rstate # Not really necessary, just mimicking # rng_mrg.MRG_RandomStream' behavior sample.rstate = rstate sample.update = (rstate, new_rstate) # We need the sample back in the main memory cpu_sample = at.as_tensor_variable(sample) f = aesara.function([], cpu_sample, mode=mode) for k in range(n_samples): s = f() stream_samples.append(s) samples.append(np.array(stream_samples).T.flatten()) # next stream curr_rstate = rng_mrg.ff_2p134(curr_rstate) samples = np.array(samples).flatten() assert np.allclose(samples, java_samples)
def test_consistency_GPUA_serial(): # Verify that the random numbers generated by GPUA_mrg_uniform, serially, # are the same as the reference (Java) implementation by L'Ecuyer et al. seed = 12345 n_samples = 5 n_streams = 12 n_substreams = 7 samples = [] curr_rstate = np.array([seed] * 6, dtype="int32") for i in range(n_streams): stream_rstate = curr_rstate.copy() for j in range(n_substreams): substream_rstate = np.array([stream_rstate.copy()], dtype="int32") # Transfer to device rstate = gpuarray_shared_constructor(substream_rstate) new_rstate, sample = GPUA_mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(1, )) rstate.default_update = new_rstate # Not really necessary, just mimicking # rng_mrg.MRG_RandomStreams' behavior sample.rstate = rstate sample.update = (rstate, new_rstate) # We need the sample back in the main memory cpu_sample = tensor.as_tensor_variable(sample) f = aesara.function([], cpu_sample, mode=mode) for k in range(n_samples): s = f() samples.append(s) # next substream stream_rstate = rng_mrg.ff_2p72(stream_rstate) # next stream curr_rstate = rng_mrg.ff_2p134(curr_rstate) samples = np.array(samples).flatten() assert np.allclose(samples, java_samples)
def test_GPUA_full_fill(): # Make sure the whole sample buffer is filled. Also make sure # large samples are consistent with CPU results. # This needs to be large to trigger the problem on GPU size = (10, 1000) R = MRG_RandomStream(234) uni = R.uniform(size, nstreams=60 * 256) f_cpu = aesara.function([], uni) rstate_gpu = gpuarray_shared_constructor( R.state_updates[-1][0].get_value()) new_rstate, sample = GPUA_mrg_uniform.new(rstate_gpu, ndim=None, dtype="float32", size=size) rstate_gpu.default_update = new_rstate f_gpu = aesara.function([], sample, mode=mode) utt.assert_allclose(f_cpu(), f_gpu())