def transform_params(*inputs): _, _, _, lower, upper = inputs lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper
def b(self, bval): return aet.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
def logp(rv, value): """Return the log-probability graph of a Random Variable""" value = at.as_tensor_variable(value, dtype=rv.dtype) return logp_aeppl(rv, value)
def choice( self, size=1, a=None, replace=True, p=None, ndim=None, dtype="int64", nstreams=None, **kwargs, ): """ Sample `size` times from a multinomial distribution defined by probabilities `p`, and returns the indices of the sampled elements. Sampled values are between 0 and `p.shape[1]-1`. Only sampling without replacement is implemented for now. Parameters ---------- size: integer or integer tensor (default 1) The number of samples. It should be between 1 and `p.shape[1]-1`. a: int or None (default None) For now, a should be None. This function will sample values between 0 and `p.shape[1]-1`. When a != None will be implemented, if `a` is a scalar, the samples are drawn from the range 0,...,a-1. We default to 2 as to have the same interface as RandomStream. replace: bool (default True) Whether the sample is with or without replacement. Only replace=False is implemented for now. p: 2d numpy array or aesara tensor the probabilities of the distribution, corresponding to values 0 to `p.shape[1]-1`. Example : p = [[.98, .01, .01], [.01, .49, .50]] and size=1 will probably result in [[0],[2]]. When setting size=2, this will probably result in [[0,1],[2,1]]. Notes ----- -`ndim` is only there keep the same signature as other uniform, binomial, normal, etc. -Does not do any value checking on pvals, i.e. there is no check that the elements are non-negative, less than 1, or sum to 1. passing pvals = [[-2., 2.]] will result in sampling [[0, 0]] -Only replace=False is implemented for now. """ if replace: raise NotImplementedError( "MRG_RandomStream.choice only works without replacement " "for now.") if a is not None: raise TypeError("For now, a has to be None in " "MRG_RandomStream.choice. Sampled values are " "between 0 and p.shape[1]-1") if p is None: raise TypeError("For now, p has to be specified in " "MRG_RandomStream.choice.") p = as_tensor_variable(p) p = undefined_grad(p) if ndim is not None: raise ValueError("ndim argument to " "MRG_RandomStream.choice " "is not used.") if p.ndim != 2: raise NotImplementedError( "MRG_RandomStream.choice is only implemented for p.ndim = 2") shape = p[:, 0].shape * size unis = self.uniform(size=shape, ndim=1, nstreams=nstreams, **kwargs) op = multinomial.ChoiceFromUniform(odtype=dtype) return op(p, unis, as_tensor_variable(size))
def new(cls, rstate, ndim, dtype, size): v_size = as_tensor_variable(size) if ndim is None: ndim = get_vector_length(v_size) op = cls(TensorType(dtype, (False, ) * ndim)) return op(rstate, v_size)
def test_test_value_constant(): x = aet.as_tensor_variable(np.zeros((5, 5))) v = op.get_test_value(x) assert np.all(v == np.zeros((5, 5)))
def safe_new(x, tag="", dtype=None): """ Internal function that constructs a new variable from x with the same type, but with a different name (old name + tag). This function is used by gradient, or the R-op to construct new variables for the inputs of the inner graph such that there is no interference between the original graph and the newly constructed graph. """ if hasattr(x, "name") and x.name is not None: nw_name = x.name + tag else: nw_name = None if isinstance(x, aesara.Constant): if dtype and x.dtype != dtype: casted_x = x.astype(dtype) nwx = x.__class__(casted_x.type, x.data, x.name) nwx.tag = copy.copy(x.tag) return nwx else: return x.clone() # Note, as_tensor_variable will convert the Scalar into a # TensorScalar that will require a ScalarFromTensor op, # making the pushout optimization fail elif isinstance(x, scalar.ScalarVariable): if dtype: nw_x = scalar.get_scalar_type(dtype=dtype)() else: nw_x = x.type() nw_x.name = nw_name if aesara.config.compute_test_value != "off": # Copy test value, cast it if necessary try: x_test_value = gof.op.get_test_value(x) except TestValueError: pass else: # This clause is executed if no exception was raised nw_x.tag.test_value = nw_x.type.filter(x_test_value) return nw_x else: try: x = tensor.as_tensor_variable(x) except TypeError: # This could happen for example for random states pass # Cast x if needed. If x has a test value, this will also cast it. if dtype and x.dtype != dtype: x = x.astype(dtype) nw_x = x.type() nw_x.name = nw_name # Preserve test values so that the 'compute_test_value' option can be used. # The test value is deep-copied to ensure there can be no interactions # between test values, due to inplace operations for instance. This may # not be the most efficient memory-wise, though. if aesara.config.compute_test_value != "off": try: nw_x.tag.test_value = copy.deepcopy(gof.op.get_test_value(x)) except TestValueError: pass return nw_x
def forward(self, x): return aet.as_tensor_variable(x)
def make_node(self, x): assert (imported_scipy ), "Scipy not available. Scipy is needed for the Cholesky op" x = as_tensor_variable(x) assert x.ndim == 2 return Apply(self, [x], [x.type()])
def __init__(self, a): self.a = aet.as_tensor_variable(a)
def __init__(self, b): self.b = aet.as_tensor_variable(b)
def validate( self, image_shape, filter_shape, border_mode="valid", subsample=(1, 1), input=None, filters=None, verify_grad=True, non_contiguous=False, filter_dilation=(1, 1), ): """ :param image_shape: The constant shape info passed to corrMM. :param filter_shape: The constant shape info passed to corrMM. """ if not aesara.config.cxx: pytest.skip("Need cxx to test conv2d") N_image_shape = [ at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in image_shape ] N_filter_shape = [ at.get_scalar_constant_value(at.as_tensor_variable(x)) for x in filter_shape ] if input is None: input = self.input if filters is None: filters = self.filters # AESARA IMPLEMENTATION # we create a symbolic function so that verify_grad can work def sym_CorrMM(input, filters): # define aesara graph and function input.name = "input" filters.name = "filters" rval = corr.CorrMM(border_mode, subsample, filter_dilation)(input, filters) rval.name = "corr_output" return rval output = sym_CorrMM(input, filters) output.name = f"CorrMM()({input.name},{filters.name})" aesara_corr = aesara.function([input, filters], output, mode=self.mode) # initialize input and compute result image_data = np.random.random(N_image_shape).astype(self.dtype) filter_data = np.random.random(N_filter_shape).astype(self.dtype) if non_contiguous: image_data = np.transpose(image_data, axes=(0, 1, 3, 2)) image_data = image_data.copy() image_data = np.transpose(image_data, axes=(0, 1, 3, 2)) filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2)) filter_data = filter_data.copy() filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2)) assert not image_data.flags["CONTIGUOUS"] assert not filter_data.flags["CONTIGUOUS"] aesara_output = aesara_corr(image_data, filter_data) # REFERENCE IMPLEMENTATION # Testing correlation, not convolution. Reverse filters. filter_data_corr = np.array(filter_data[:, :, ::-1, ::-1], copy=True, order="C") orig_image_data = image_data img_shape2d = np.array(N_image_shape[-2:]) fil_shape2d = np.array(N_filter_shape[-2:]) dil_shape2d = np.array(filter_dilation) dil_fil_shape2d = (fil_shape2d - 1) * dil_shape2d + 1 subsample2d = np.array(subsample) if border_mode == "full": padHW = dil_fil_shape2d - 1 elif border_mode == "valid": padHW = np.array([0, 0]) elif border_mode == "half": padHW = np.floor(dil_fil_shape2d / 2).astype("int32") elif isinstance(border_mode, tuple): padHW = np.array(border_mode) elif isinstance(border_mode, int): padHW = np.array([border_mode, border_mode]) else: raise NotImplementedError(f"Unsupported border_mode {border_mode}") out_shape2d = (np.floor( (img_shape2d + 2 * (padHW) - dil_fil_shape2d) / subsample2d) + 1) # avoid numpy deprecation out_shape2d = out_shape2d.astype("int32") out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d) ref_output = np.zeros(out_shape) # loop over output feature maps ref_output.fill(0) image_data2 = np.zeros(( N_image_shape[0], N_image_shape[1], N_image_shape[2] + 2 * padHW[0], N_image_shape[3] + 2 * padHW[1], )) image_data2[:, :, padHW[0]:padHW[0] + N_image_shape[2], padHW[1]:padHW[1] + N_image_shape[3], ] = image_data image_data = image_data2 N_image_shape = image_data.shape for bb in range(N_image_shape[0]): for nn in range(N_filter_shape[0]): for im0 in range(N_image_shape[1]): filter2d = filter_data_corr[nn, im0, :, :] image2d = image_data[bb, im0, :, :] for row in range(ref_output.shape[2]): irow = row * subsample[0] # image row for col in range(ref_output.shape[3]): icol = col * subsample[1] # image col ref_output[bb, nn, row, col] += ( image2d[ irow:irow + dil_fil_shape2d[0]:filter_dilation[0], icol:icol + dil_fil_shape2d[1]:filter_dilation[1], ] * filter2d[::-1, ::-1]).sum() utt.assert_allclose(ref_output, aesara_output) # TEST GRADIENT if verify_grad: utt.verify_grad(sym_CorrMM, [orig_image_data, filter_data], mode=self.mode)
def make_node(self, x, i0, i1, amt): _i0 = at.as_tensor_variable(i0) _i1 = at.as_tensor_variable(i1) return Apply(self, [x, _i0, _i1, amt], [x.type()])
def __init__(self, input_dim, period, tau=4, active_dims=None): super().__init__(input_dim, active_dims) self.c = at.as_tensor_variable(period / 2) self.tau = tau
def joint_logpt( var: Union[TensorVariable, List[TensorVariable]], rv_values: Optional[Union[TensorVariable, Dict[TensorVariable, TensorVariable]]] = None, *, jacobian: bool = True, scaling: bool = True, transformed: bool = True, sum: bool = True, **kwargs, ) -> Union[TensorVariable, List[TensorVariable]]: """Create a measure-space (i.e. log-likelihood) graph for a random variable or a list of random variables at a given point. The input `var` determines which log-likelihood graph is used and `rv_value` is that graph's input parameter. For example, if `var` is the output of a ``NormalRV`` ``Op``, then the output is a graph of the density function for `var` set to the value `rv_value`. Parameters ========== var The `RandomVariable` output that determines the log-likelihood graph. Can also be a list of variables. The final log-likelihood graph will be the sum total of all individual log-likelihood graphs of variables in the list. rv_values A variable, or ``dict`` of variables, that represents the value of `var` in its log-likelihood. If no `rv_value` is provided, ``var.tag.value_var`` will be checked and, when available, used. jacobian Whether or not to include the Jacobian term. scaling A scaling term to apply to the generated log-likelihood graph. transformed Apply transforms. sum Sum the log-likelihood or return each term as a separate list item. """ # TODO: In future when we drop support for tag.value_var most of the following # logic can be removed and logpt can just be a wrapper function that calls aeppl's # joint_logprob directly. # If var is not a list make it one. if not isinstance(var, (list, tuple)): var = [var] # If logpt isn't provided values it is assumed that the tagged value var or # observation is the value variable for that particular RV. if rv_values is None: rv_values = {} for rv in var: value_var = getattr(rv.tag, "observations", getattr(rv.tag, "value_var", None)) if value_var is None: raise ValueError(f"No value variable found for var {rv}") rv_values[rv] = value_var # Else we assume we were given a single rv and respective value elif not isinstance(rv_values, Mapping): if len(var) == 1: rv_values = { var[0]: at.as_tensor_variable(rv_values).astype(var[0].type) } else: raise ValueError( "rv_values must be a dict if more than one var is requested") if scaling: rv_scalings = {} for rv, value_var in rv_values.items(): rv_scalings[value_var] = _get_scaling( getattr(rv.tag, "total_size", None), value_var.shape, value_var.ndim) # Aeppl needs all rv-values pairs, not just that of the requested var. # Hence we iterate through the graph to collect them. tmp_rvs_to_values = rv_values.copy() for node in io_toposort(graph_inputs(var), var): try: curr_vars = [node.default_output()] except ValueError: curr_vars = node.outputs for curr_var in curr_vars: if curr_var in tmp_rvs_to_values: continue # Check if variable has a value variable value_var = getattr(curr_var.tag, "observations", getattr(curr_var.tag, "value_var", None)) if value_var is not None: tmp_rvs_to_values[curr_var] = value_var # After collecting all necessary rvs and values, we check for any value transforms transform_map = {} if transformed: for rv, value_var in tmp_rvs_to_values.items(): if hasattr(value_var.tag, "transform"): transform_map[value_var] = value_var.tag.transform # If the provided value_variable does not have transform information, we # check if the original `rv.tag.value_var` does. # TODO: This logic should be replaced by an explicit dict of # `{value_var: transform}` similar to `rv_values`. else: original_value_var = getattr(rv.tag, "value_var", None) if original_value_var is not None and hasattr( original_value_var.tag, "transform"): transform_map[value_var] = original_value_var.tag.transform transform_opt = TransformValuesOpt(transform_map) temp_logp_var_dict = factorized_joint_logprob(tmp_rvs_to_values, extra_rewrites=transform_opt, use_jacobian=jacobian, **kwargs) # aeppl returns the logpt for every single value term we provided to it. This includes # the extra values we plugged in above, so we filter those we actually wanted in the # same order they were given in. logp_var_dict = {} for value_var in rv_values.values(): logp_var_dict[value_var] = temp_logp_var_dict[value_var] if scaling: for value_var in logp_var_dict.keys(): if value_var in rv_scalings: logp_var_dict[value_var] *= rv_scalings[value_var] if sum: logp_var = at.sum( [at.sum(factor) for factor in logp_var_dict.values()]) else: logp_var = list(logp_var_dict.values()) return logp_var
def make_node(self, x): x = at.as_tensor_variable(x) return Apply(self, [x], [x.type()])
def safe_new(x: Variable, tag: str = "", dtype: Optional[Union[str, np.dtype]] = None) -> Variable: """Clone variables. Internal function that constructs a new variable from `x` with the same type, but with a different name (old name + tag). This function is used by `gradient`, or the R-op to construct new variables for the inputs of the inner graph such that there is no interference between the original graph and the newly constructed graph. """ if hasattr(x, "name") and x.name is not None: nw_name = x.name + tag else: nw_name = None if isinstance(x, Constant): # TODO: Do something better about this assert isinstance(x.type, HasDataType) if dtype and x.type.dtype != dtype: casted_x = cast(x, dtype) nwx = type(x)(casted_x.type, x.data, x.name) nwx.tag = copy.copy(x.tag) return nwx else: return x # Note, `as_tensor_variable` will convert the `ScalarType` into a # `TensorScalar` that will require a `ScalarFromTensor` `Op`, making the # push-out optimization fail elif isinstance(x, aes.ScalarVariable): if dtype: nw_x = aes.get_scalar_type(dtype=dtype)() else: nw_x = x.type() nw_x.name = nw_name if config.compute_test_value != "off": # Copy test value, cast it if necessary try: x_test_value = get_test_value(x) except TestValueError: pass else: # This clause is executed if no exception was raised nw_x.tag.test_value = nw_x.type.filter(x_test_value) return nw_x else: try: x = at.as_tensor_variable(x) except TypeError: # This could happen for example for random states pass # Cast `x` if needed. If `x` has a test value, this will also cast it. if dtype: # TODO: Do something better about this assert isinstance(x.type, HasDataType) if x.type.dtype != dtype: x = cast(x, dtype) nw_x = x.type() nw_x.name = nw_name # Preserve test values so that the `compute_test_value` option can be used. # The test value is deep-copied to ensure there can be no interactions # between test values, due to inplace operations for instance. This may # not be the most efficient memory-wise, though. if config.compute_test_value != "off": try: nw_x.tag.test_value = copy.deepcopy(get_test_value(x)) except TestValueError: pass return nw_x
], ) def test_beta_samples(a, b, size): rv_numpy_tester(beta, a, b, size=size) M_at = iscalar("M") M_at.tag.test_value = 3 sd_at = scalar("sd") sd_at.tag.test_value = np.array(1.0, dtype=config.floatX) @pytest.mark.parametrize( "M, sd, size", [ (at.as_tensor_variable(np.array(1.0, dtype=config.floatX)), sd_at, ()), ( at.as_tensor_variable(np.array(1.0, dtype=config.floatX)), sd_at, (M_at, ), ), ( at.as_tensor_variable(np.array(1.0, dtype=config.floatX)), sd_at, (2, M_at), ), (at.zeros((M_at, )), sd_at, ()), (at.zeros((M_at, )), sd_at, (M_at, )), (at.zeros((M_at, )), sd_at, (2, M_at)), (at.zeros((M_at, )), at.ones((M_at, )), ()), (at.zeros((M_at, )), at.ones((M_at, )), (2, M_at)),
def __init__(self, condition): self.condition = tensor.as_tensor_variable(condition) assert self.condition.ndim == 0
def test_neibs_wrap_centered_step_manual(self): expected1 = [ [24, 20, 21, 4, 0, 1, 9, 5, 6], [21, 22, 23, 1, 2, 3, 6, 7, 8], [23, 24, 20, 3, 4, 0, 8, 9, 5], [9, 5, 6, 14, 10, 11, 19, 15, 16], [6, 7, 8, 11, 12, 13, 16, 17, 18], [8, 9, 5, 13, 14, 10, 18, 19, 15], [19, 15, 16, 24, 20, 21, 4, 0, 1], [16, 17, 18, 21, 22, 23, 1, 2, 3], [18, 19, 15, 23, 24, 20, 3, 4, 0], ] expected2 = [ [24, 20, 21, 4, 0, 1, 9, 5, 6], [22, 23, 24, 2, 3, 4, 7, 8, 9], [14, 10, 11, 19, 15, 16, 24, 20, 21], [12, 13, 14, 17, 18, 19, 22, 23, 24], ] expected3 = [ [19, 15, 16, 24, 20, 21, 4, 0, 1, 9, 5, 6, 14, 10, 11], [17, 18, 19, 22, 23, 24, 2, 3, 4, 7, 8, 9, 12, 13, 14], [9, 5, 6, 14, 10, 11, 19, 15, 16, 24, 20, 21, 4, 0, 1], [7, 8, 9, 12, 13, 14, 17, 18, 19, 22, 23, 24, 2, 3, 4], ] expected4 = [ [23, 24, 20, 21, 22, 3, 4, 0, 1, 2, 8, 9, 5, 6, 7], [21, 22, 23, 24, 20, 1, 2, 3, 4, 0, 6, 7, 8, 9, 5], [13, 14, 10, 11, 12, 18, 19, 15, 16, 17, 23, 24, 20, 21, 22], [11, 12, 13, 14, 10, 16, 17, 18, 19, 15, 21, 22, 23, 24, 20], ] expected5 = [ [24, 20, 21, 4, 0, 1, 9, 5, 6], [22, 23, 24, 2, 3, 4, 7, 8, 9], [9, 5, 6, 14, 10, 11, 19, 15, 16], [7, 8, 9, 12, 13, 14, 17, 18, 19], [19, 15, 16, 24, 20, 21, 4, 0, 1], [17, 18, 19, 22, 23, 24, 2, 3, 4], ] expected6 = [ [24, 20, 21, 4, 0, 1, 9, 5, 6], [21, 22, 23, 1, 2, 3, 6, 7, 8], [23, 24, 20, 3, 4, 0, 8, 9, 5], [14, 10, 11, 19, 15, 16, 24, 20, 21], [11, 12, 13, 16, 17, 18, 21, 22, 23], [13, 14, 10, 18, 19, 15, 23, 24, 20], ] # TODO test discontinous image for shp_idx, (shape, neib_shape, neib_step, expected) in enumerate([ [(7, 8, 5, 5), (3, 3), (2, 2), expected1], [(7, 8, 5, 5), (3, 3), (3, 3), expected2], [(7, 8, 5, 5), (5, 3), (3, 3), expected3], [(7, 8, 5, 5), (3, 5), (3, 3), expected4], [(80, 90, 5, 5), (3, 3), (2, 3), expected5], [(1025, 9, 5, 5), (3, 3), (3, 2), expected6], [(1, 1, 5, 1035), (3, 3), (3, 3), None], [(1, 1, 1045, 5), (3, 3), (3, 3), None], ]): for dtype in self.dtypes: images = shared( np.asarray(np.arange(np.prod(shape)).reshape(shape), dtype=dtype)) neib_shape = tt.as_tensor_variable(neib_shape) neib_step = tt.as_tensor_variable(neib_step) expected = np.asarray(expected) f = function( [], images2neibs(images, neib_shape, neib_step, mode="wrap_centered"), mode=self.mode, ) neibs = f() if expected.size > 1: for i in range(shape[0] * shape[1]): assert np.allclose( neibs[i * expected.shape[0]:(i + 1) * expected.shape[0], :], expected + 25 * i, ), "wrap_centered" assert self.op in [ type(node.op) for node in f.maker.fgraph.toposort() ]
def with_mode( self, mode, scalar_op=aes.add, dtype="floatX", pre_scalar_op=None, test_nan=False, tensor_op=None, ): for xsh, tosum in self.cases: if dtype == "floatX": dtype = aesara.config.floatX x = self.type(dtype, [(entry == 1) for entry in xsh])("x") d = {} if pre_scalar_op is not None: d = {"pre_scalar_op": pre_scalar_op} if tensor_op is None: e = as_tensor_variable(self.op(scalar_op, axis=tosum, **d)(x)) else: e = as_tensor_variable(tensor_op(x, axis=tosum, **d)) if tosum is None: tosum = list(range(len(xsh))) f = aesara.function([x], e, mode=mode, on_unused_input="ignore") xv = np.asarray(np.random.random(xsh)) if dtype not in discrete_dtypes: xv = np.asarray(xv, dtype=dtype) else: xv = np.asarray(xv < 0.5, dtype=dtype) if test_nan and xv.size > 0: if len(xsh) > 0: xv = xv.flatten() xv[0] = np.nan xv = xv.reshape(*xsh) else: xv = np.asarray(np.nan, dtype=dtype) zv = xv if pre_scalar_op is not None: zv = Elemwise(scalar_op=pre_scalar_op)(x).eval({x: xv}) if len(tosum) > 1 and any(a < 0 for a in tosum): # In that case, we need to use the good order of axis # in the reduction. axis2 = [] for a in tosum: if a < 0: axis2.append(a + len(xsh)) else: axis2.append(a) assert len(axis2) == len(tosum) tosum = tuple(axis2) if tensor_op == at_all: for axis in reversed(sorted(tosum)): zv = np.all(zv, axis) if len(tosum) == 0: zv = zv != 0 elif tensor_op == at_any: for axis in reversed(sorted(tosum)): zv = np.any(zv, axis) if len(tosum) == 0: zv = zv != 0 elif scalar_op == aes.add: for axis in reversed(sorted(tosum)): zv = np.add.reduce(zv, axis) if dtype == "bool": # np.add of a bool upcast, while CAReduce don't zv = zv.astype(dtype) elif scalar_op == aes.mul: for axis in reversed(sorted(tosum)): zv = np.multiply.reduce(zv, axis) elif scalar_op == aes.scalar_maximum: # There is no identity value for the maximum function # So we can't support shape of dimensions 0. if np.prod(zv.shape) == 0: continue for axis in reversed(sorted(tosum)): zv = np.maximum.reduce(zv, axis) elif scalar_op == aes.scalar_minimum: # There is no identity value for the minimum function # So we can't support shape of dimensions 0. if np.prod(zv.shape) == 0: continue for axis in reversed(sorted(tosum)): zv = np.minimum.reduce(zv, axis) elif scalar_op == aes.or_: for axis in reversed(sorted(tosum)): zv = np.bitwise_or.reduce(zv, axis) elif scalar_op == aes.and_: for axis in reversed(sorted(tosum)): zv = reduce_bitwise_and(zv, axis, dtype=dtype) elif scalar_op == aes.xor: # There is no identity value for the xor function # So we can't support shape of dimensions 0. if np.prod(zv.shape) == 0: continue for axis in reversed(sorted(tosum)): zv = np.bitwise_xor.reduce(zv, axis) else: raise Exception( f"Test for CAReduce with scalar_op {scalar_op} not implemented" ) if test_nan: try: assert self.type.values_eq(f(xv), zv), (f(xv), zv) except NotImplementedError: # GpuCAReduce don't implement all cases when size is 0 assert xv.size == 0 else: try: f_xv = f(xv) assert f_xv.shape == zv.shape, (f_xv, zv) utt.assert_allclose(zv, f_xv) except NotImplementedError: # GpuCAReduce don't implement all cases when size is 0 assert xv.size == 0 x = self.type(dtype, [(entry == 1) for entry in xsh])("x") if tensor_op is None: e = self.op(scalar_op, axis=tosum)(x) else: e = tensor_op(x, axis=tosum) if tosum is None: tosum = list(range(len(xsh))) f = aesara.function([x], e.shape, mode=mode, on_unused_input="ignore") if not ( scalar_op in [aes.scalar_maximum, aes.scalar_minimum] and (xsh == () or np.prod(xsh) == 0) ): try: assert all(f(xv) == zv.shape) except NotImplementedError: # GpuCAReduce don't implement all cases when size is 0 assert xv.size == 0
def make_node(self, diag): diag = at.as_tensor_variable(diag) if diag.type.ndim != 2: raise TypeError("data argument must be a matrix", diag.type) return Apply(self, [diag], [at.tensor3(dtype=diag.dtype)])
def normal( self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None, truncate=False, **kwargs, ): """ Sample a tensor of values from a normal distribution. Parameters ---------- size : int_vector_like Array dimensions for the output tensor. avg : float_like, optional The mean value for the truncated normal to sample from (defaults to 0.0). std : float_like, optional The standard deviation for the truncated normal to sample from (defaults to 1.0). truncate : bool, optional Truncates the normal distribution at 2 standard deviations if True (defaults to False). When this flag is set, the standard deviation of the result will be less than the one specified. ndim : int, optional The number of dimensions for the output tensor (defaults to None). This argument is necessary if the size argument is ambiguous on the number of dimensions. dtype : str, optional The data-type for the output tensor. If not specified, the dtype is inferred from avg and std, but it is at least as precise as floatX. kwargs Other keyword arguments for random number generation (see uniform). Returns ------- samples : TensorVariable A Aesara tensor of samples randomly drawn from a normal distribution. """ size = _check_size(size) avg = undefined_grad(as_tensor_variable(avg)) std = undefined_grad(as_tensor_variable(std)) if dtype is None: dtype = aes.upcast(config.floatX, avg.dtype, std.dtype) avg = at.cast(avg, dtype=dtype) std = at.cast(std, dtype=dtype) # generate even number of uniform samples # Do manual constant folding to lower optiimizer work. if isinstance(size, Constant): n_odd_samples = size.prod(dtype="int64") else: n_odd_samples = prod(size, dtype="int64") n_even_samples = n_odd_samples + n_odd_samples % 2 uniform = self.uniform( (n_even_samples, ), low=0.0, high=1.0, ndim=1, dtype=dtype, nstreams=nstreams, **kwargs, ) # box-muller transform u1 = uniform[:n_even_samples // 2] u2 = uniform[n_even_samples // 2:] r = sqrt(-2.0 * log(u1)) theta = np.array(2.0 * np.pi, dtype=dtype) * u2 cos_theta, sin_theta = cos(theta), sin(theta) z0 = r * cos_theta z1 = r * sin_theta if truncate: # use valid samples to_fix0 = (z0 < -2.0) | (z0 > 2.0) to_fix1 = (z1 < -2.0) | (z1 > 2.0) z0_valid = z0[at.nonzero(~to_fix0)] z1_valid = z1[at.nonzero(~to_fix1)] # re-sample invalid samples to_fix0 = at.nonzero(to_fix0)[0] to_fix1 = at.nonzero(to_fix1)[0] n_fix_samples = to_fix0.size + to_fix1.size lower = at.constant(1.0 / np.e**2, dtype=dtype) u_fix = self.uniform( (n_fix_samples, ), low=lower, high=1.0, ndim=1, dtype=dtype, nstreams=nstreams, **kwargs, ) r_fix = sqrt(-2.0 * log(u_fix)) z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0] z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1] # pack everything together to a useful result norm_samples = at.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed) else: norm_samples = at.join(0, z0, z1) if isinstance(n_odd_samples, Variable): samples = norm_samples[:n_odd_samples] elif n_odd_samples % 2 == 1: samples = norm_samples[:-1] else: samples = norm_samples samples = reshape(samples, newshape=size, ndim=ndim) samples *= std samples += avg return samples
def make_node(self, a): a_ = aet.as_tensor_variable(a) r = Apply(self, [a_], [a_.type()]) return r
def uniform(self, size, low=0.0, high=1.0, ndim=None, dtype=None, nstreams=None, **kwargs): # TODO : need description for parameter 'size', 'ndim', 'nstreams' """ Sample a tensor of given size whose element from a uniform distribution between low and high. If the size argument is ambiguous on the number of dimensions, ndim may be a plain integer to supplement the missing information. Parameters ---------- low Lower bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``low`` will be cast into dtype. This bound is excluded. high Higher bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``high`` will be cast into dtype. This bound is excluded. size Can be a list of integer or Aesara variable (ex: the shape of other Aesara Variable). dtype The output data type. If dtype is not specified, it will be inferred from the dtype of low and high, but will be at least as precise as floatX. """ low = as_tensor_variable(low) high = as_tensor_variable(high) if dtype is None: dtype = aes.upcast(config.floatX, low.dtype, high.dtype) low = cast(low, dtype=dtype) high = cast(high, dtype=dtype) low = undefined_grad(low) high = undefined_grad(high) if isinstance(size, tuple): msg = "size must be a tuple of int or an Aesara variable" assert all( isinstance(i, (np.integer, int, Variable)) for i in size), msg if any(isinstance(i, (np.integer, int)) and i <= 0 for i in size): raise ValueError( "The specified size contains a dimension with value <= 0", size) else: if not (isinstance(size, Variable) and size.ndim == 1): raise TypeError("size must be a tuple of int or an Aesara " "Variable with 1 dimension, got " + str(size) + " of type " + str(type(size))) orig_nstreams = nstreams if nstreams is None: nstreams = self.n_streams(size) rstates = self.get_substream_rstates(nstreams, dtype) d = {} if "target" in kwargs: d = dict(target=kwargs.pop("target")) if len(kwargs) > 0: raise TypeError( f"uniform() got unexpected keyword arguments {kwargs.keys()}") node_rstate = shared(rstates, **d) u = self.pretty_return( node_rstate, *mrg_uniform.new(node_rstate, ndim, dtype, size), size=size, nstreams=orig_nstreams, ) # Add a reference to distinguish from other shared variables node_rstate.tag.is_rng = True r = u * (high - low) + low if u.type.broadcastable != r.type.broadcastable: raise NotImplementedError( "Increase the size to match the broadcasting pattern of " "`low` and `high` arguments") assert r.dtype == dtype return r
def test_pool2d(): shps = [ (1, 12), (1, 1, 12), (1, 1, 1, 12), (1, 1, 2, 2), (1, 1, 1, 1), (1, 1, 4, 4), (1, 1, 10, 11), (1, 2, 2, 2), (3, 5, 4, 4), (25, 1, 7, 7), (1, 1, 12, 12), (1, 1, 2, 14), (1, 1, 12, 14), (1, 1, 14, 14), (1, 1, 16, 16), (1, 1, 18, 18), (1, 1, 24, 24), (1, 6, 24, 24), (10, 1, 24, 24), (10, 6, 24, 24), (30, 6, 12, 12), (30, 2, 24, 24), (30, 6, 24, 24), (10, 10, 10, 11), (1, 1, 10, 1025), (1, 1, 10, 1023), (1, 1, 1025, 10), (1, 1, 1023, 10), (3, 2, 16, 16, 16), (3, 2, 6, 6, 6, 5), (3, 2, 6, 6, 6, 5, 7), ] np.random.RandomState(utt.fetch_seed()).shuffle(shps) test_ws = (2, 2), (3, 2), (1, 1) test_st = (2, 2), (3, 2), (1, 1) test_mode = ["max", "sum", "average_inc_pad", "average_exc_pad"] ref_mode = copy.copy(mode_without_gpu) ref_mode.check_py_code = False gpu_mode = mode_with_gpu.excluding("cudnn") gpu_mode.check_py_code = False for shp in shps: for mode, ws, st in itertools.product(test_mode, test_ws, test_st): if ws[0] > shp[-2] or ws[1] > shp[-1]: continue for ignore_border, pad in zip((True, False), [(1, 1), (0, 0)]): if pad[0] >= ws[0] or pad[1] >= ws[1]: continue if mode == "average_exc_pad" and (pad[0] > 0 or pad[1] > 0): continue # print('test_pool2d', shp, ws, st, pad, mode, ignore_border) ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border) a = aesara.shared(rand(*shp), "a") a_pooled = ds_op(aet.as_tensor_variable(a), ws, st, pad) f = aesara.function([], a_pooled, mode=gpu_mode) f2 = aesara.function([], a_pooled, mode=ref_mode) assert any( [isinstance(node.op, GpuPool) for node in f.maker.fgraph.toposort()] ) assert any( [isinstance(node.op, Pool) for node in f2.maker.fgraph.toposort()] ) assert np.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border) a_pooled_grad = grad(a_pooled.sum(), a) g = aesara.function([], a_pooled_grad, mode=gpu_mode) g2 = aesara.function([], a_pooled_grad, mode=ref_mode) if mode == "max": gop = GpuMaxPoolGrad gop2 = MaxPoolGrad else: gop = GpuAveragePoolGrad gop2 = AveragePoolGrad assert any( [isinstance(node.op, gop) for node in g.maker.fgraph.toposort()] ) assert any( [isinstance(node.op, gop2) for node in g2.maker.fgraph.toposort()] ) assert np.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border) # test rop and grad grad for max pooling # for average pooling grad grad is just average pooling grad if mode != "max": continue ea = aesara.shared(rand(*shp), "ea") gr = aesara.function([], Rop(a_pooled, a, ea), mode=gpu_mode) gr2 = aesara.function([], Rop(a_pooled, a, ea), mode=ref_mode) assert any( [ isinstance(node.op, GpuDownsampleFactorMaxGradGrad) for node in gr.maker.fgraph.toposort() ] ) assert any( [ isinstance(node.op, DownsampleFactorMaxGradGrad) for node in gr2.maker.fgraph.toposort() ] ) assert np.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border) ggf = Lop(grad((a_pooled ** 2).sum(), a), a, a) gg = aesara.function([], ggf, mode=gpu_mode) gg2 = aesara.function([], ggf, mode=ref_mode) assert any( [ isinstance(node.op, GpuDownsampleFactorMaxGradGrad) for node in gg.maker.fgraph.toposort() ] ) assert any( [ isinstance(node.op, DownsampleFactorMaxGradGrad) for node in gg2.maker.fgraph.toposort() ] ) assert np.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
def logpt( var: TensorVariable, rv_values: Optional[Union[TensorVariable, Dict[TensorVariable, TensorVariable]]] = None, *, jacobian: bool = True, scaling: bool = True, transformed: bool = True, sum: bool = True, **kwargs, ) -> TensorVariable: """Create a measure-space (i.e. log-likelihood) graph for a random variable or a list of random variables at a given point. The input `var` determines which log-likelihood graph is used and `rv_value` is that graph's input parameter. For example, if `var` is the output of a ``NormalRV`` ``Op``, then the output is a graph of the density function for `var` set to the value `rv_value`. Parameters ========== var The `RandomVariable` output that determines the log-likelihood graph. Can also be a list of variables. The final log-likelihood graph will be the sum total of all individual log-likelihood graphs of variables in the list. rv_values A variable, or ``dict`` of variables, that represents the value of `var` in its log-likelihood. If no `rv_value` is provided, ``var.tag.value_var`` will be checked and, when available, used. jacobian Whether or not to include the Jacobian term. scaling A scaling term to apply to the generated log-likelihood graph. transformed Apply transforms. sum Sum the log-likelihood. """ # TODO: In future when we drop support for tag.value_var most of the following # logic can be removed and logpt can just be a wrapper function that calls aeppl's # joint_logprob directly. # If var is not a list make it one. if not isinstance(var, list): var = [var] # If logpt isn't provided values and the variable (provided in var) # is an RV, it is assumed that the tagged value var or observation is # the value variable for that particular RV. if rv_values is None: rv_values = {} for _var in var: if isinstance(_var.owner.op, RandomVariable): rv_value_var = getattr(_var.tag, "observations", getattr(_var.tag, "value_var", _var)) rv_values = {_var: rv_value_var} elif not isinstance(rv_values, Mapping): # Else if we're given a single value and a single variable we assume a mapping among them. rv_values = ({ var[0]: at.as_tensor_variable(rv_values).astype(var[0].type) } if len(var) == 1 else {}) # Since the filtering of logp graph is based on value variables # provided to this function if not rv_values: warnings.warn( "No value variables provided the logp will be an empty graph") if scaling: rv_scalings = {} for _var in var: rv_value_var = getattr(_var.tag, "observations", getattr(_var.tag, "value_var", _var)) rv_scalings[rv_value_var] = _get_scaling( getattr(_var.tag, "total_size", None), rv_value_var.shape, rv_value_var.ndim) # Aeppl needs all rv-values pairs, not just that of the requested var. # Hence we iterate through the graph to collect them. tmp_rvs_to_values = rv_values.copy() transform_map = {} for node in io_toposort(graph_inputs(var), var): try: curr_vars = [node.default_output()] except ValueError: curr_vars = node.outputs for curr_var in curr_vars: rv_value_var = getattr(curr_var.tag, "observations", getattr(curr_var.tag, "value_var", None)) if rv_value_var is None: continue rv_value = rv_values.get(curr_var, rv_value_var) tmp_rvs_to_values[curr_var] = rv_value # Along with value variables we also check for transforms if any. if hasattr(rv_value_var.tag, "transform") and transformed: transform_map[rv_value] = rv_value_var.tag.transform transform_opt = TransformValuesOpt(transform_map) temp_logp_var_dict = factorized_joint_logprob(tmp_rvs_to_values, extra_rewrites=transform_opt, use_jacobian=jacobian, **kwargs) # aeppl returns the logpt for every single value term we provided to it. This includes # the extra values we plugged in above so we need to filter those out. logp_var_dict = {} for value_var, _logp in temp_logp_var_dict.items(): if value_var in rv_values.values(): logp_var_dict[value_var] = _logp # If it's an empty dictionary the logp is None if not logp_var_dict: logp_var = None else: # Otherwise apply appropriate scalings and at.add and/or at.sum the # graphs accordingly. if scaling: for _value in logp_var_dict.keys(): if _value in rv_scalings: logp_var_dict[_value] *= rv_scalings[_value] if len(logp_var_dict) == 1: logp_var_dict = tuple(logp_var_dict.values())[0] if sum: logp_var = at.sum(logp_var_dict) else: logp_var = logp_var_dict else: if sum: logp_var = at.sum( [at.sum(factor) for factor in logp_var_dict.values()]) else: logp_var = at.add(*logp_var_dict.values()) # Recompute test values for the changes introduced by the replacements # above. if config.compute_test_value != "off": for node in io_toposort(graph_inputs((logp_var, )), (logp_var, )): compute_test_value(node) return logp_var
def test_broadcast_shape(): def shape_tuple(x, use_bcast=True): if use_bcast: return tuple(s if not bcast else 1 for s, bcast in zip(tuple(x.shape), x.broadcastable)) else: return tuple(s for s in tuple(x.shape)) x = np.array([[1], [2], [3]]) y = np.array([4, 5, 6]) b = np.broadcast(x, y) x_aet = aet.as_tensor_variable(x) y_aet = aet.as_tensor_variable(y) b_aet = broadcast_shape(x_aet, y_aet) assert np.array_equal([z.eval() for z in b_aet], b.shape) # Now, we try again using shapes as the inputs # # This case also confirms that a broadcast dimension will # broadcast against a non-broadcast dimension when they're # both symbolic (i.e. we couldn't obtain constant values). b_aet = broadcast_shape( shape_tuple(x_aet, use_bcast=False), shape_tuple(y_aet, use_bcast=False), arrays_are_shapes=True, ) assert any( isinstance(node.op, Assert) for node in applys_between([x_aet, y_aet], b_aet)) assert np.array_equal([z.eval() for z in b_aet], b.shape) b_aet = broadcast_shape(shape_tuple(x_aet), shape_tuple(y_aet), arrays_are_shapes=True) assert np.array_equal([z.eval() for z in b_aet], b.shape) # These are all constants, so there shouldn't be any asserts in the # resulting graph. assert not any( isinstance(node.op, Assert) for node in applys_between([x_aet, y_aet], b_aet)) x = np.array([1, 2, 3]) y = np.array([4, 5, 6]) b = np.broadcast(x, y) x_aet = aet.as_tensor_variable(x) y_aet = aet.as_tensor_variable(y) b_aet = broadcast_shape(x_aet, y_aet) assert np.array_equal([z.eval() for z in b_aet], b.shape) b_aet = broadcast_shape(shape_tuple(x_aet), shape_tuple(y_aet), arrays_are_shapes=True) assert np.array_equal([z.eval() for z in b_aet], b.shape) # TODO: This will work when/if we use a more sophisticated `is_same_graph` # implementation. # assert not any( # isinstance(node.op, Assert) # for node in graph_ops([x_aet, y_aet], b_aet) # ) x = np.empty((1, 2, 3)) y = np.array(1) b = np.broadcast(x, y) x_aet = aet.as_tensor_variable(x) y_aet = aet.as_tensor_variable(y) b_aet = broadcast_shape(x_aet, y_aet) assert b_aet[0].value == 1 assert np.array_equal([z.eval() for z in b_aet], b.shape) assert not any( isinstance(node.op, Assert) for node in applys_between([x_aet, y_aet], b_aet)) b_aet = broadcast_shape(shape_tuple(x_aet), shape_tuple(y_aet), arrays_are_shapes=True) assert np.array_equal([z.eval() for z in b_aet], b.shape) x = np.empty((2, 1, 3)) y = np.empty((2, 1, 1)) b = np.broadcast(x, y) x_aet = aet.as_tensor_variable(x) y_aet = aet.as_tensor_variable(y) b_aet = broadcast_shape(x_aet, y_aet) assert b_aet[1].value == 1 assert np.array_equal([z.eval() for z in b_aet], b.shape) # TODO: This will work when/if we use a more sophisticated `is_same_graph` # implementation. # assert not any( # isinstance(node.op, Assert) # for node in graph_ops([x_aet, y_aet], b_aet) # ) b_aet = broadcast_shape(shape_tuple(x_aet), shape_tuple(y_aet), arrays_are_shapes=True) assert np.array_equal([z.eval() for z in b_aet], b.shape) x1_shp_aet = iscalar("x1") x2_shp_aet = iscalar("x2") y1_shp_aet = iscalar("y1") x_shapes = (1, x1_shp_aet, x2_shp_aet) x_aet = aet.ones(x_shapes) y_shapes = (y1_shp_aet, 1, x2_shp_aet) y_aet = aet.ones(y_shapes) b_aet = broadcast_shape(x_aet, y_aet) # TODO: This will work when/if we use a more sophisticated `is_same_graph` # implementation. # assert not any( # isinstance(node.op, Assert) # for node in graph_ops([x_aet, y_aet], b_aet) # ) res = aet.as_tensor(b_aet).eval({ x1_shp_aet: 10, x2_shp_aet: 4, y1_shp_aet: 2, }) assert np.array_equal(res, (2, 10, 4)) y_shapes = (y1_shp_aet, 1, y1_shp_aet) y_aet = aet.ones(y_shapes) b_aet = broadcast_shape(x_aet, y_aet) assert isinstance(b_aet[-1].owner.op, Assert)
def logcdf(rv, value): """Return the log-cdf graph of a Random Variable""" value = at.as_tensor_variable(value, dtype=rv.dtype) return logcdf_aeppl(rv, value)
def make_node(self, epsilon, obs_data, sim_data): epsilon = at.as_tensor_variable(epsilon) obs_data = at.as_tensor_variable(obs_data) sim_data = at.as_tensor_variable(sim_data) return Apply(self, [epsilon, obs_data, sim_data], [vectorX()])