def test_select_proportional_to_weight(self): # Tests that ChoiceFromUniform selects elements, on average, # proportional to the their probabilities p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 10 mean_rtol = 0.0005 np.random.seed(12345) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) avg_pvals = np.zeros((n_elements,), dtype=config.floatX) for rep in range(10000): uni = np.random.rand(n_selected).astype(config.floatX) res = f(pvals, uni, n_selected) res = np.squeeze(res) avg_pvals[res] += 1 avg_pvals /= avg_pvals.sum() avg_diff = np.mean(abs(avg_pvals - pvals)) assert avg_diff < mean_rtol, avg_diff
def test_gpu_opt_wor(): # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() for replace in [False, True]: m = multinomial.ChoiceFromUniform(odtype="auto", replace=replace)(p, u, n) assert m.dtype == "int64", m.dtype f = function([p, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAChoiceFromUniform for node in f.maker.fgraph.toposort() ]) n_samples = 3 pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones(pval.shape[0] * n_samples) * 0.5 f(pval, uval, n_samples) # Test with a row, it was failing in the past. r = tensor.frow() m = multinomial.ChoiceFromUniform("auto", replace=replace)(r, u, n) assert m.dtype == "int64", m.dtype f = function([r, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAChoiceFromUniform for node in f.maker.fgraph.toposort() ]) pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 f(pval, uval, 1)
def test_fail_select_alot(self): # Tests that ChoiceFromUniform fails when asked to sample more # elements than the actual number of elements p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 200 np.random.seed(12345) uni = np.random.rand(n_selected).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) with pytest.raises(ValueError): f(pvals, uni, n_selected)
def test_select_distinct(self): # Tests that ChoiceFromUniform always selects distinct elements p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 1000 all_indices = range(n_elements) np.random.seed(12345) for i in [5, 10, 50, 100, 500, n_elements]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) res = np.squeeze(res) assert len(res) == i, res assert np.all(np.in1d(np.unique(res), all_indices)), res
def choice( self, size=1, a=None, replace=True, p=None, ndim=None, dtype="int64", nstreams=None, **kwargs, ): """ Sample `size` times from a multinomial distribution defined by probabilities `p`, and returns the indices of the sampled elements. Sampled values are between 0 and `p.shape[1]-1`. Only sampling without replacement is implemented for now. Parameters ---------- size: integer or integer tensor (default 1) The number of samples. It should be between 1 and `p.shape[1]-1`. a: int or None (default None) For now, a should be None. This function will sample values between 0 and `p.shape[1]-1`. When a != None will be implemented, if `a` is a scalar, the samples are drawn from the range 0,...,a-1. We default to 2 as to have the same interface as RandomStream. replace: bool (default True) Whether the sample is with or without replacement. Only replace=False is implemented for now. p: 2d numpy array or aesara tensor the probabilities of the distribution, corresponding to values 0 to `p.shape[1]-1`. Example : p = [[.98, .01, .01], [.01, .49, .50]] and size=1 will probably result in [[0],[2]]. When setting size=2, this will probably result in [[0,1],[2,1]]. Notes ----- -`ndim` is only there keep the same signature as other uniform, binomial, normal, etc. -Does not do any value checking on pvals, i.e. there is no check that the elements are non-negative, less than 1, or sum to 1. passing pvals = [[-2., 2.]] will result in sampling [[0, 0]] -Only replace=False is implemented for now. """ if replace: raise NotImplementedError( "MRG_RandomStream.choice only works without replacement " "for now.") if a is not None: raise TypeError("For now, a has to be None in " "MRG_RandomStream.choice. Sampled values are " "between 0 and p.shape[1]-1") if p is None: raise TypeError("For now, p has to be specified in " "MRG_RandomStream.choice.") p = as_tensor_variable(p) p = undefined_grad(p) if ndim is not None: raise ValueError("ndim argument to " "MRG_RandomStream.choice " "is not used.") if p.ndim != 2: raise NotImplementedError( "MRG_RandomStream.choice is only implemented for p.ndim = 2") shape = p[:, 0].shape * size unis = self.uniform(size=shape, ndim=1, nstreams=nstreams, **kwargs) op = multinomial.ChoiceFromUniform(odtype=dtype) return op(p, unis, as_tensor_variable(size))
def test_select_distinct(self): # Tests that ChoiceFromUniform always selects distinct elements p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 1000 all_indices = range(n_elements) np.random.seed(12345) expected = [ np.asarray([[931, 318, 185, 209, 559]]), np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]), np.asarray([[ 546, 28, 79, 665, 295, 779, 433, 531, 411, 716, 244, 234, 70, 88, 612, 639, 383, 335, 451, 100, 175, 492, 848, 771, 559, 214, 568, 596, 370, 486, 855, 925, 138, 300, 528, 507, 730, 199, 882, 357, 58, 195, 705, 900, 66, 468, 513, 410, 816, 672, ]]), ] for i in [5, 10, 50, 100, 500, n_elements]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) for ii in range(len(expected)): if expected[ii].shape == res.shape: assert (expected[ii] == res).all() res = np.squeeze(res) assert len(res) == i assert np.all(np.in1d(np.unique(res), all_indices)), res