def moment(rv, size, mu, sigma, init, steps): grw_moment = at.zeros_like(rv) grw_moment = at.set_subtensor(grw_moment[..., 0], moment(init)) # Add one dimension to the right, so that mu broadcasts safely along the steps # dimension grw_moment = at.set_subtensor(grw_moment[..., 1:], mu[..., None]) return at.cumsum(grw_moment, axis=-1)
def test_wrong_dims(self): a = tt.matrix() increment = tt.matrix() index = 0 with pytest.raises(TypeError): tt.set_subtensor(a[index], increment) with pytest.raises(TypeError): tt.inc_subtensor(a[index], increment)
def L(self): if self.batched: L = at.zeros((self.ddim, self.ddim, self.bdim)) L = at.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T) L = L.dimshuffle(2, 0, 1) else: L = at.zeros((self.ddim, self.ddim)) L = at.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"]) return L
def test_extract_obs_data(): with pytest.raises(TypeError): extract_obs_data(at.matrix()) data = np.random.normal(size=(2, 3)) data_at = at.as_tensor(data) mask = np.random.binomial(1, 0.5, size=(2, 3)).astype(bool) for val_at in (data_at, aesara.shared(data)): res = extract_obs_data(val_at) assert isinstance(res, np.ndarray) assert np.array_equal(res, data) # AdvancedIncSubtensor check data_m = np.ma.MaskedArray(data, mask) missing_values = data_at.type()[mask] constant = at.as_tensor(data_m.filled()) z_at = at.set_subtensor(constant[mask.nonzero()], missing_values) assert isinstance(z_at.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1)) res = extract_obs_data(z_at) assert isinstance(res, np.ndarray) assert np.ma.allequal(res, data_m) # AdvancedIncSubtensor1 check data = np.random.normal(size=(3, )) data_at = at.as_tensor(data) mask = np.random.binomial(1, 0.5, size=(3, )).astype(bool) data_m = np.ma.MaskedArray(data, mask) missing_values = data_at.type()[mask] constant = at.as_tensor(data_m.filled()) z_at = at.set_subtensor(constant[mask.nonzero()], missing_values) assert isinstance(z_at.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1)) res = extract_obs_data(z_at) assert isinstance(res, np.ndarray) assert np.ma.allequal(res, data_m) # Cast check data = np.array(5) t = at.cast(at.as_tensor(5.0), np.int64) res = extract_obs_data(t) assert isinstance(res, np.ndarray) assert np.array_equal(res, data)
def L(self): if self.batched: L = at.zeros((self.ddim, self.ddim, self.bdim)) L = at.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T) L = L.dimshuffle(2, 0, 1) else: L = at.zeros((self.ddim, self.ddim)) L = at.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"]) Ld = L[..., np.arange(self.ddim), np.arange(self.ddim)] L = at.set_subtensor(Ld, rho2sigma(Ld)) return L
def test_AdvancedIncSubtensor1(x, y, indices): out_aet = aet.set_subtensor(x[indices], y) assert isinstance(out_aet.owner.op, aet_subtensor.AdvancedIncSubtensor1) out_fg = FunctionGraph([], [out_aet]) compare_numba_and_py(out_fg, []) out_aet = aet.inc_subtensor(x[indices], y) assert isinstance(out_aet.owner.op, aet_subtensor.AdvancedIncSubtensor1) out_fg = FunctionGraph([], [out_aet]) compare_numba_and_py(out_fg, []) x_at = x.type() out_aet = aet.set_subtensor(x_at[indices], y, inplace=True) assert isinstance(out_aet.owner.op, aet_subtensor.AdvancedIncSubtensor1) out_fg = FunctionGraph([x_at], [out_aet]) compare_numba_and_py(out_fg, [x.data])
def make_node(self, inp, s=None): # A shape parameter is expected as an input. For now this is used to # manage odd transform sizes. # Later this could be extended to handle padding and trunkation, # following numpy's interface. However, cuFFT expects array that match # the shape given to the plan, so padding will have to be done in the op. # The effect of padding on gradients has yet to be investigated. if not skcuda_available: raise RuntimeError("skcuda is needed for CuIFFTOp") if not pygpu_available: raise RuntimeError("pygpu is needed for CuIFFTOp") if not pycuda_available: raise RuntimeError("pycuda is needed for CuIFFTOp") inp = gpu_contiguous(as_gpuarray_variable(inp, infer_context_name(inp))) # If no shape is provided as input, calculate shape assuming even real transform. if s is None: s = inp.shape[1:-1] s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2) s = tt.as_tensor_variable(s) assert inp.dtype == "float32" assert s.ndim == 1 return aesara.Apply(self, [inp, s], [self.output_type(inp)()])
def test_simple_2d(self): # Increments or sets part of a tensor by a scalar using full slice and # a partial slice depending on a scalar. a = tt.dmatrix() increment = tt.dscalar() sl1 = slice(None) sl2_end = tt.lscalar() sl2 = slice(sl2_end) for do_set in [False, True]: if do_set: resut = tt.set_subtensor(a[sl1, sl2], increment) else: resut = tt.inc_subtensor(a[sl1, sl2], increment) f = aesara.function([a, increment, sl2_end], resut) val_a = np.ones((5, 5)) val_inc = 2.3 val_sl2_end = 2 result = f(val_a, val_inc, val_sl2_end) expected_result = np.copy(val_a) if do_set: expected_result[:, :val_sl2_end] = val_inc else: expected_result[:, :val_sl2_end] += val_inc utt.assert_allclose(result, expected_result)
def test_logpt_incsubtensor(indices, size): """Make sure we can compute a log-likelihood for ``Y[idx] = data`` where ``Y`` is univariate.""" mu = floatX(np.power(10, np.arange(np.prod(size)))).reshape(size) data = mu[indices] sigma = 0.001 rng = np.random.RandomState(232) a_val = rng.normal(mu, sigma, size=size).astype(aesara.config.floatX) rng = aesara.shared(rng, borrow=False) a = Normal.dist(mu, sigma, size=size, rng=rng) a_value_var = a.type() a.name = "a" a_idx = at.set_subtensor(a[indices], data) assert isinstance(a_idx.owner.op, (IncSubtensor, AdvancedIncSubtensor, AdvancedIncSubtensor1)) a_idx_value_var = a_idx.type() a_idx_value_var.name = "a_idx_value" a_idx_logp = logpt(a_idx, {a_idx: a_value_var}, sum=False) logp_vals = a_idx_logp.eval({a_value_var: a_val}) # The indices that were set should all have the same log-likelihood values, # because the values they were set to correspond to the unique means along # that dimension. This helps us confirm that the log-likelihood is # associating the assigned values with their correct parameters. a_val_idx = a_val.copy() a_val_idx[indices] = data exp_obs_logps = sp.norm.logpdf(a_val_idx, mu, sigma) np.testing.assert_almost_equal(logp_vals, exp_obs_logps)
def cuirfft(inp, norm=None, is_odd=False): r""" Performs the inverse fast Fourier Transform with real-valued output on the GPU. The input is a variable of dimensions (m, ..., n//2+1, 2) with type float32 representing the non-trivial elements of m real-valued Fourier transforms of initial size (..., n). The real and imaginary parts are stored as a pair of float32 arrays. The output is a real-valued float32 variable of dimensions (m, ..., n) giving the m inverse FFTs. Parameters ---------- inp Array of float32 of size (m, ..., n//2+1, 2), containing m inputs with n//2+1 non-trivial elements on the last dimension and real and imaginary parts stored as separate arrays. norm : {None, 'ortho', 'no_norm'} Normalization of transform. Following numpy, default *None* normalizes only the inverse transform by n, 'ortho' yields the unitary transform (:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves the transform unnormalized. is_odd : {True, False} Set to True to get a real inverse transform output with an odd last dimension of length (N-1)*2 + 1 for an input last dimension of length N. """ if is_odd not in (True, False): raise ValueError("Invalid value %s for id_odd, must be True or False" % is_odd) s = inp.shape[1:-1] if is_odd: s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1) else: s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2) cond_norm = _unitary(norm) scaling = 1 if cond_norm is None: scaling = s.prod().astype("float32") elif cond_norm == "ortho": scaling = tt.sqrt(s.prod().astype("float32")) return cuirfft_op(inp, s) / scaling
def test_inplace(self): """Make sure that in-place optimizations are *not* performed on the output of a ``BroadcastTo``.""" a = aet.zeros((5, )) d = aet.vector("d") c = aet.set_subtensor(a[np.r_[0, 1, 3]], d) b = broadcast_to(c, (5, )) q = b[np.r_[0, 1, 3]] e = aet.set_subtensor(q, np.r_[0, 0, 0]) opts = Query(include=["inplace"]) py_mode = Mode("py", opts) e_fn = function([d], e, mode=py_mode) advincsub_node = e_fn.maker.fgraph.outputs[0].owner assert isinstance(advincsub_node.op, AdvancedIncSubtensor1) assert isinstance(advincsub_node.inputs[0].owner.op, BroadcastTo) assert advincsub_node.op.inplace is False
def test_AdvancedIncSubtensor(x, y, indices): out_aet = aet.set_subtensor(x[indices], y) assert isinstance(out_aet.owner.op, aet_subtensor.AdvancedIncSubtensor) out_fg = FunctionGraph([], [out_aet]) compare_numba_and_py(out_fg, []) out_aet = aet.inc_subtensor(x[indices], y) assert isinstance(out_aet.owner.op, aet_subtensor.AdvancedIncSubtensor) out_fg = FunctionGraph([], [out_aet]) compare_numba_and_py(out_fg, []) x_at = x.type() out_aet = aet.set_subtensor(x_at[indices], y) # Inplace isn't really implemented for `AdvancedIncSubtensor`, so we just # hack it on here out_aet.owner.op.inplace = True assert isinstance(out_aet.owner.op, aet_subtensor.AdvancedIncSubtensor) out_fg = FunctionGraph([x_at], [out_aet]) compare_numba_and_py(out_fg, [x.data])
def grad(self, inputs, output_grads): (gout, ) = output_grads s = inputs[1] # Divide the last dimension of the output gradients by 2, they are # double-counted by the real-IFFT due to symmetry, except the first # and last elements (for even transforms) which are unique. idx = ([slice(None)] * (gout.ndim - 2) + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]) gout = tt.set_subtensor(gout[idx], gout[idx] * 0.5) return [cuirfft_op(gout, s), DisconnectedType()()]
def expand_packed_triangular(n, packed, lower=True, diagonal_only=False): r"""Convert a packed triangular matrix into a two dimensional array. Triangular matrices can be stored with better space efficiency by storing the non-zero values in a one-dimensional array. We number the elements by row like this (for lower or upper triangular matrices): [[0 - - -] [[0 1 2 3] [1 2 - -] [- 4 5 6] [3 4 5 -] [- - 7 8] [6 7 8 9]] [- - - 9] Parameters ---------- n: int The number of rows of the triangular matrix. packed: aesara.vector The matrix in packed format. lower: bool, default=True If true, assume that the matrix is lower triangular. diagonal_only: bool If true, return only the diagonal of the matrix. """ if packed.ndim != 1: raise ValueError("Packed triangular is not one dimensional.") if not isinstance(n, int): raise TypeError("n must be an integer") if diagonal_only and lower: diag_idxs = np.arange(1, n + 1).cumsum() - 1 return packed[diag_idxs] elif diagonal_only and not lower: diag_idxs = np.arange(2, n + 2)[::-1].cumsum() - n - 1 return packed[diag_idxs] elif lower: out = at.zeros((n, n), dtype=aesara.config.floatX) idxs = np.tril_indices(n) return at.set_subtensor(out[idxs], packed) elif not lower: out = at.zeros((n, n), dtype=aesara.config.floatX) idxs = np.triu_indices(n) return at.set_subtensor(out[idxs], packed)
def grad(self, inputs, output_grads): (gout, ) = output_grads s = inputs[1] gf = curfft_op(gout, s) # Multiply the last dimension of the gradient by 2, they represent # both positive and negative frequencies, except the first # and last elements (for even transforms) which are unique. idx = ([slice(None)] * (gf.ndim - 2) + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]) gf = tt.set_subtensor(gf[idx], gf[idx] * 2) return [gf, DisconnectedType()()]
def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10): """Returns a function that returns parameter updates. Instead of accumulated estimate, uses running window Parameters ---------- loss_or_grads: symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params: list of shared variables The variables to generate update expressions for learning_rate: float Learning rate. epsilon: float Offset to avoid zero-division in the normalizer of adagrad. n_win: int Number of past steps to calculate scales of parameter gradients. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression """ if loss_or_grads is None and params is None: return partial(adagrad_window, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( "Please provide both `loss_or_grads` and `params` to get updates") grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): i = aesara.shared(pm.floatX(0)) i_int = i.astype("int32") value = param.get_value(borrow=True) accu = aesara.shared( np.zeros(value.shape + (n_win, ), dtype=value.dtype)) # Append squared gradient vector to accu_new accu_new = aet.set_subtensor(accu[..., i_int], grad**2) i_new = aet.switch((i + 1) < n_win, i + 1, 0) updates[accu] = accu_new updates[i] = i_new accu_sum = accu_new.sum(axis=-1) updates[param] = param - (learning_rate * grad / aet.sqrt(accu_sum + epsilon)) return updates
def logp(self, obs): """Return the scalar Theano log-likelihood at a point.""" obs_tt = at.as_tensor_variable(obs) logp_val = at.alloc(-np.inf, *obs.shape) for i, dist in enumerate(self.comp_dists): i_mask = at.eq(self.states, i) obs_i = obs_tt[i_mask] subset_dist = dist.dist(*distribution_subset_args(dist, obs.shape, i_mask)) logp_val = at.set_subtensor(logp_val[i_mask], subset_dist.logp(obs_i)) return logp_val
def grad(self, inputs, cost_grad): """ In defining the gradient, the Finite Fourier Transform is viewed as a complex-differentiable function of a complex variable """ a = inputs[0] n = inputs[1] axis = inputs[2] grad = cost_grad[0] if not isinstance(axis, tensor.TensorConstant): raise NotImplementedError( "%s: gradient is currently implemented" " only for axis being a Aesara constant" % self.__class__.__name__) axis = int(axis.data) # notice that the number of actual elements in wrto is independent of # possible padding or truncation: elem = tensor.arange(0, tensor.shape(a)[axis], 1) # accounts for padding: freq = tensor.arange(0, n, 1) outer = tensor.outer(freq, elem) pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1.0 * n)) res = tensor.tensordot(grad, pow_outer, (axis, 0)) # This would be simpler but not implemented by aesara: # res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]), # tensor.set_subtensor(res[...,n::], 0, False, False), res) # Instead we resort to that to account for truncation: flip_shape = list(np.arange(0, a.ndim)[::-1]) res = res.dimshuffle(flip_shape) res = tensor.switch( tensor.lt(n, tensor.shape(a)[axis]), tensor.set_subtensor( res[n::, ], 0, False, False, ), res, ) res = res.dimshuffle(flip_shape) # insures that gradient shape conforms to input shape: out_shape = (list(np.arange(0, axis)) + [a.ndim - 1] + list(np.arange(axis, a.ndim - 1))) res = res.dimshuffle(*out_shape) return [res, None, None]
def expand_empty(tensor_var, size): """ Transforms the shape of a tensor from (d1, d2 ... ) to ( d1+size, d2, ..) by adding uninitialized memory at the end of the tensor. """ if size == 0: return tensor_var shapes = [tensor_var.shape[x] for x in range(tensor_var.ndim)] new_shape = [size + shapes[0]] + shapes[1:] empty = tensor.AllocEmpty(tensor_var.dtype)(*new_shape) ret = tensor.set_subtensor(empty[:shapes[0]], tensor_var) ret.tag.nan_guard_mode_check = False return ret
def incsubtensor_logp(op, var, rvs_to_values, indexed_rv_var, rv_values, *indices, **kwargs): index = indices_from_subtensor(getattr(op, "idx_list", None), indices) _, (new_rv_var, ) = clone( tuple(v for v in graph_inputs((indexed_rv_var, )) if not isinstance(v, Constant)), (indexed_rv_var, ), copy_inputs=False, copy_orphans=False, ) new_values = at.set_subtensor( disconnected_grad(new_rv_var)[index], rv_values) logp_var = logpt(indexed_rv_var, new_values, **kwargs) return logp_var
def make_node(self, a, s=None): a = tt.as_tensor_variable(a) if a.ndim < 3: raise TypeError( "%s: input must have dimension >= 3, with " % self.__class__.__name__ + "first dimension batches and last real/imag parts" ) if s is None: s = a.shape[1:-1] s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2) s = tt.as_tensor_variable(s) else: s = tt.as_tensor_variable(s) if s.dtype not in tt.integer_dtypes: raise TypeError( "%s: length of the transformed axis must be" " of type integer" % self.__class__.__name__ ) return gof.Apply(self, [a, s], [self.output_type(a)()])
def test_neibs_half_step_by_valid(self): neib_shapes = ((3, 3), (3, 5), (5, 3)) for shp_idx, (shape, neib_step) in enumerate([ [(7, 8, 5, 5), (1, 1)], [(7, 8, 5, 5), (2, 2)], [(7, 8, 5, 5), (4, 4)], [(7, 8, 5, 5), (1, 4)], [(7, 8, 5, 5), (4, 1)], [(80, 90, 5, 5), (1, 2)], [(1025, 9, 5, 5), (2, 1)], [(1, 1, 5, 1037), (2, 4)], [(1, 1, 1045, 5), (4, 2)], ]): for neib_shape in neib_shapes: for dtype in self.dtypes: x = aesara.shared( np.random.standard_normal(shape).astype(dtype)) extra = (neib_shape[0] // 2, neib_shape[1] // 2) padded_shape = ( x.shape[0], x.shape[1], x.shape[2] + 2 * extra[0], x.shape[3] + 2 * extra[1], ) padded_x = at.zeros(padded_shape) padded_x = at.set_subtensor( padded_x[:, :, extra[0]:-extra[0], extra[1]:-extra[1]], x) x_using_valid = images2neibs(padded_x, neib_shape, neib_step, mode="valid") x_using_half = images2neibs(x, neib_shape, neib_step, mode="half") f_valid = aesara.function([], x_using_valid, mode="FAST_RUN") f_half = aesara.function([], x_using_half, mode=self.mode) unittest_tools.assert_allclose(f_valid(), f_half())
def test_neibs_full_step_by_valid(self): for shp_idx, (shape, neib_step, neib_shapes) in enumerate([ [(7, 8, 5, 5), (1, 1), ((3, 3), (3, 5), (5, 3))], [(7, 8, 5, 5), (2, 2), ((3, 3), (3, 5), (5, 3))], [(7, 8, 6, 6), (3, 3), ((2, 2), (2, 5), (5, 2))], [(7, 8, 6, 6), (1, 3), ((2, 2), (2, 5), (5, 2))], [(7, 8, 6, 6), (3, 1), ((2, 2), (2, 5), (5, 2))], [(80, 90, 5, 5), (1, 2), ((3, 3), (3, 5), (5, 3))], [(1025, 9, 5, 5), (2, 1), ((3, 3), (3, 5), (5, 3))], [(1, 1, 11, 1037), (2, 3), ((3, 3), (5, 3))], [(1, 1, 1043, 11), (3, 2), ((3, 3), (3, 5))], ]): for neib_shape in neib_shapes: for dtype in self.dtypes: x = aesara.shared( np.random.standard_normal(shape).astype(dtype)) extra = (neib_shape[0] - 1, neib_shape[1] - 1) padded_shape = ( x.shape[0], x.shape[1], x.shape[2] + 2 * extra[0], x.shape[3] + 2 * extra[1], ) padded_x = at.zeros(padded_shape) padded_x = at.set_subtensor( padded_x[:, :, extra[0]:-extra[0], extra[1]:-extra[1]], x) x_using_valid = images2neibs(padded_x, neib_shape, neib_step, mode="valid") x_using_full = images2neibs(x, neib_shape, neib_step, mode="full") f_valid = aesara.function([], x_using_valid, mode="FAST_RUN") f_full = aesara.function([], x_using_full, mode=self.mode) unittest_tools.assert_allclose(f_valid(), f_full())
def tt_logsumexp(x, axis=None, keepdims=False): """Construct a Theano graph for a log-sum-exp calculation.""" x_max_ = at.max(x, axis=axis, keepdims=True) if x_max_.ndim > 0: x_max_ = at.set_subtensor(x_max_[at.isinf(x_max_)], 0.0) elif at.isinf(x_max_): x_max_ = at.as_tensor(0.0) res = at.sum(at.exp(x - x_max_), axis=axis, keepdims=keepdims) res = at.log(res) if not keepdims: # SciPy uses the `axis` keyword here, but Theano doesn't support that. # x_max_ = tt.squeeze(x_max_, axis=axis) axis = np.atleast_1d(axis) if axis is not None else range(x_max_.ndim) x_max_ = x_max_.dimshuffle([ i for i in range(x_max_.ndim) if not x_max_.broadcastable[i] or i not in axis ]) return res + x_max_
def conv3d(signals, filters, signals_shape=None, filters_shape=None, border_mode="valid"): """ Convolve spatio-temporal filters with a movie. It flips the filters. Parameters ---------- signals Timeseries of images whose pixels have color channels. Shape: [Ns, Ts, C, Hs, Ws]. filters Spatio-temporal filters. Shape: [Nf, Tf, C, Hf, Wf]. signals_shape None or a tuple/list with the shape of signals. filters_shape None or a tuple/list with the shape of filters. border_mode One of 'valid', 'full' or 'half'. Notes ----- Another way to define signals: (batch, time, in channel, row, column) Another way to define filters: (out channel,time,in channel, row, column) For the GPU, use nnet.conv3d. See Also -------- Someone made a script that shows how to swap the axes between both 3d convolution implementations in Aesara. See the last `attachment <https://groups.google.com/d/msg/aesara-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_ """ if isinstance(border_mode, str): border_mode = (border_mode, border_mode, border_mode) if signals_shape is None: _signals_shape_5d = signals.shape else: _signals_shape_5d = signals_shape if filters_shape is None: _filters_shape_5d = filters.shape else: _filters_shape_5d = filters_shape Ns, Ts, C, Hs, Ws = _signals_shape_5d Nf, Tf, C, Hf, Wf = _filters_shape_5d _signals_shape_4d = (Ns * Ts, C, Hs, Ws) _filters_shape_4d = (Nf * Tf, C, Hf, Wf) if border_mode[1] != border_mode[2]: raise NotImplementedError("height and width bordermodes must match") conv2d_signal_shape = _signals_shape_4d conv2d_filter_shape = _filters_shape_4d if signals_shape is None: conv2d_signal_shape = None if filters_shape is None: conv2d_filter_shape = None out_4d = tensor.nnet.conv2d( signals.reshape(_signals_shape_4d), filters.reshape(_filters_shape_4d), input_shape=conv2d_signal_shape, filter_shape=conv2d_filter_shape, border_mode=border_mode[1], ) # ignoring border_mode[2] # compute the intended output size if border_mode[1] == "valid": Hout = Hs - Hf + 1 Wout = Ws - Wf + 1 elif border_mode[1] == "full": Hout = Hs + Hf - 1 Wout = Ws + Wf - 1 elif border_mode[1] == "half": Hout = Hs - (Hf % 2) + 1 Wout = Ws - (Wf % 2) + 1 elif border_mode[1] == "same": raise NotImplementedError() else: raise ValueError("invalid border mode", border_mode[1]) # reshape the temporary output to restore its original size out_tmp = out_4d.reshape((Ns, Ts, Nf, Tf, Hout, Wout)) # now sum out along the Tf to get the output # but we have to sum on a diagonal through the Tf and Ts submatrix. if Tf == 1: # for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged! out_5d = out_tmp.reshape((Ns, Ts, Nf, Hout, Wout)) else: # for some types of convolution, pad out_tmp with zeros if border_mode[0] == "valid": Tpad = 0 elif border_mode[0] == "full": Tpad = Tf - 1 elif border_mode[0] == "half": Tpad = Tf // 2 elif border_mode[0] == "same": raise NotImplementedError() else: raise ValueError("invalid border mode", border_mode[0]) if Tpad == 0: out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3) else: # pad out_tmp with zeros before summing over the diagonal out_tmp_padded = tensor.zeros(dtype=out_tmp.dtype, shape=(Ns, Ts + 2 * Tpad, Nf, Tf, Hout, Wout)) out_tmp_padded = tensor.set_subtensor( out_tmp_padded[:, Tpad:(Ts + Tpad), :, :, :, :], out_tmp) out_5d = diagonal_subtensor(out_tmp_padded, 1, 3).sum(axis=3) return out_5d
def neibs2images(neibs, neib_shape, original_shape, mode="valid"): """ Function :func:`neibs2images <aesara.sandbox.neighbours.neibs2images>` performs the inverse operation of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. It inputs the output of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` and reconstructs its input. Parameters ---------- neibs : 2d tensor Like the one obtained by :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. neib_shape `neib_shape` that was used in :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. original_shape Original shape of the 4d tensor given to :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` Returns ------- object Reconstructs the input of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`, a 4d tensor of shape `original_shape`. Notes ----- Currently, the function doesn't support tensors created with `neib_step` different from default value. This means that it may be impossible to compute the gradient of a variable gained by :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` w.r.t. its inputs in this case, because it uses :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` for gradient computation. Examples -------- Example, which uses a tensor gained in example for :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`: .. code-block:: python im_new = neibs2images(neibs, (5, 5), im_val.shape) # Aesara function definition inv_window = aesara.function([neibs], im_new) # Function application im_new_val = inv_window(neibs_val) .. note:: The code will output the initial image array. """ neibs = tt.as_tensor_variable(neibs) neib_shape = tt.as_tensor_variable(neib_shape) original_shape = tt.as_tensor_variable(original_shape) new_neib_shape = tt.stack( [original_shape[-1] // neib_shape[1], neib_shape[1]]) output_2d = images2neibs(neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode) if mode == "ignore_borders": # We use set_subtensor to accept original_shape we can't infer # the shape and still raise error when it don't have the right # shape. valid_shape = original_shape valid_shape = tt.set_subtensor( valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0]) valid_shape = tt.set_subtensor( valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1]) output_4d = output_2d.reshape(valid_shape, ndim=4) # padding the borders with zeros for d in [2, 3]: pad_shape = list(output_4d.shape) pad_shape[d] = original_shape[d] - valid_shape[d] output_4d = tt.concatenate( [output_4d, tt.zeros(pad_shape)], axis=d) elif mode == "valid": # TODO: we do not implement all mode with this code. # Add a check for the good cases. output_4d = output_2d.reshape(original_shape, ndim=4) else: raise NotImplementedError("neibs2images do not support mode=%s" % mode) return output_4d
def test_setsubtensor1(self): tv = np.asarray(self.rng.uniform(size=(3,)), aesara.config.floatX) t = aesara.shared(tv) out = tensor.set_subtensor(self.x[:3], t) self.check_rop_lop(out, self.in_shape)
def test_setsubtensor2(self): tv = np.asarray(self.rng.uniform(size=(10,)), aesara.config.floatX) t = aesara.shared(tv) out = tensor.set_subtensor(t[:4], self.x[:4]) self.check_rop_lop(out, (10,))
def test_TransMatConjugateStep_subtensors(): # Confirm that Dirichlet/non-Dirichlet mixed rows can be # parsed with pm.Model(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) p_0_rv = at.as_tensor([0, 0, 1]) p_1_rv = at.zeros(3) p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) p_2_rv = at.zeros(3) p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) P_tt = at.stack([p_0_rv, p_1_rv, p_2_rv]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10, )) transmat = TransMatConjugateStep(P_rv) assert transmat.row_remaps == {0: 1, 1: 2} exp_slices = {0: np.r_[0, 2], 1: np.r_[1, 2]} assert exp_slices.keys() == transmat.row_slices.keys() assert all( np.array_equal(transmat.row_slices[i], exp_slices[i]) for i in exp_slices.keys()) # Same thing, just with some manipulations of the transition matrix with pm.Model(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) p_0_rv = at.as_tensor([0, 0, 1]) p_1_rv = at.zeros(3) p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) p_2_rv = at.zeros(3) p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) P_tt = at.horizontal_stack(p_0_rv[..., None], p_1_rv[..., None], p_2_rv[..., None]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10, )) transmat = TransMatConjugateStep(P_rv) assert transmat.row_remaps == {0: 1, 1: 2} exp_slices = {0: np.r_[0, 2], 1: np.r_[1, 2]} assert exp_slices.keys() == transmat.row_slices.keys() assert all( np.array_equal(transmat.row_slices[i], exp_slices[i]) for i in exp_slices.keys()) # Use an observed `DiscreteMarkovChain` and check the conjugate results with pm.Model(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) p_0_rv = at.as_tensor([0, 0, 1]) p_1_rv = at.zeros(3) p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) p_2_rv = at.zeros(3) p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) P_tt = at.horizontal_stack(p_0_rv[..., None], p_1_rv[..., None], p_2_rv[..., None]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(4, ), observed=np.r_[0, 1, 0, 2]) transmat = TransMatConjugateStep(P_rv)
def just_numeric_args(a, b): return tt.set_subtensor(a[s], b)