def test_merge_with_weird_eq(): # numpy arrays don't compare equal like other python objects # SCALAR CASE x = tt.constant(np.asarray(1), name="x") y = tt.constant(np.asarray(1), name="y") g = FunctionGraph([x, y], [x + y]) MergeOptimizer().optimize(g) assert len(g.apply_nodes) == 1 node = list(g.apply_nodes)[0] assert len(node.inputs) == 2 assert node.inputs[0] is node.inputs[1] # NONSCALAR CASE # This was created to test TensorConstantSignature x = tt.constant(np.ones(5), name="x") y = tt.constant(np.ones(5), name="y") g = FunctionGraph([x, y], [x + y]) MergeOptimizer().optimize(g) assert len(g.apply_nodes) == 1 node = list(g.apply_nodes)[0] assert len(node.inputs) == 2 assert node.inputs[0] is node.inputs[1]
def make_node(self, rng, size, dtype, *dist_params): """Create a random variable node. XXX: Unnamed/non-keyword arguments are considered distribution parameters! If you want to set `size`, `rng`, and/or `name`, use their keywords. Parameters ---------- rng: RandomStateType Existing Aesara `RandomState` object to be used. Creates a new one, if `None`. size: int or Sequence Numpy-like size of the output (i.e. replications). dtype: str The dtype of the sampled output. If the value ``"floatX"`` is given, then ``dtype`` is set to ``aesara.config.floatX``. This value is only used when `self.dtype` isn't set. dist_params: list Distribution parameters. Results ------- out: `Apply` A node with inputs `(rng, size, dtype) + dist_args` and outputs `(rng_var, out_var)`. """ size = normalize_size_param(size) dist_params = tuple( as_tensor_variable(p) if not isinstance(p, Variable) else p for p in dist_params ) if rng is None: rng = aesara.shared(np.random.RandomState()) elif not isinstance(rng.type, RandomStateType): raise TypeError("The type of rng should be an instance of RandomStateType") bcast = self.compute_bcast(dist_params, size) dtype = self.dtype or dtype if dtype == "floatX": dtype = config.floatX elif dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes): raise TypeError("dtype is unspecified") if isinstance(dtype, str): dtype_idx = constant(all_dtypes.index(dtype), dtype="int64") else: dtype_idx = constant(dtype, dtype="int64") dtype = all_dtypes[dtype_idx.data] outtype = TensorType(dtype=dtype, broadcastable=bcast) out_var = outtype() inputs = (rng, size, dtype_idx) + dist_params outputs = (rng.type(), out_var) return Apply(self, inputs, outputs)
def make_node(self, rng, size, dtype, *dist_params): """Create a random variable node. Parameters ---------- rng: RandomGeneratorType or RandomStateType Existing Aesara `Generator` or `RandomState` object to be used. Creates a new one, if `None`. size: int or Sequence NumPy-like size parameter. dtype: str The dtype of the sampled output. If the value ``"floatX"`` is given, then `dtype` is set to ``aesara.config.floatX``. This value is only used when ``self.dtype`` isn't set. dist_params: list Distribution parameters. Results ------- out: Apply A node with inputs ``(rng, size, dtype) + dist_args`` and outputs ``(rng_var, out_var)``. """ size = normalize_size_param(size) dist_params = tuple( as_tensor_variable(p) if not isinstance(p, Variable) else p for p in dist_params) if rng is None: rng = aesara.shared(np.random.default_rng()) elif not isinstance(rng.type, RandomType): raise TypeError( "The type of rng should be an instance of either RandomGeneratorType or RandomStateType" ) shape = self._infer_shape(size, dist_params) _, bcast = infer_broadcastable(shape) dtype = self.dtype or dtype if dtype == "floatX": dtype = config.floatX elif dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes): raise TypeError("dtype is unspecified") if isinstance(dtype, str): dtype_idx = constant(all_dtypes.index(dtype), dtype="int64") else: dtype_idx = constant(dtype, dtype="int64") dtype = all_dtypes[dtype_idx.data] outtype = TensorType(dtype=dtype, shape=bcast) out_var = outtype() inputs = (rng, size, dtype_idx) + dist_params outputs = (rng.type(), out_var) return Apply(self, inputs, outputs)
def grad(self, inputs, grads): x, scale, bias, est_mean, est_var, epsilon = inputs dy = grads[0] axes = self.axes if min(axes) < 0 or max(axes) >= x.ndim: raise ValueError( f"axes should be less than ndim (<{x.ndim}), but {axes} given" ) scale, bias, est_mean, est_var = ( aet.addbroadcast(t, *axes) for t in (scale, bias, est_mean, est_var) ) # define helper expressions est_var_eps = est_var + epsilon est_std = sqrt(est_var_eps) two = aet.constant(2.0) # define and return gradients dx = dy * (scale / est_std) dscale = (dy * (x - est_mean)).sum(axes, keepdims=True) / est_std dbias = dy.sum(axes, keepdims=True) dmean = -dy.sum(axes, keepdims=True) * (scale / est_std) dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * ( scale / (two * est_var_eps * est_std) ) return [dx, dscale, dbias, dmean, dvar, aesara.gradient.DisconnectedType()()]
def test_reshape(self): new_shape = constant( np.asarray([self.mat_in_shape[0] * self.mat_in_shape[1]], dtype="int64")) self.check_mat_rop_lop(self.mx.reshape(new_shape), (self.mat_in_shape[0] * self.mat_in_shape[1], ))
def test_dense_types(): x = matrix() assert isinstance(x, DenseTensorVariable) assert not isinstance(x, DenseTensorConstant) x = constant(1) assert not isinstance(x, DenseTensorVariable) assert isinstance(x, DenseTensorConstant)
def hard_sigmoid(x): """ An approximation of sigmoid. More approximate and faster than ultra_fast_sigmoid. Approx in 3 parts: 0, scaled linear, 1. Removing the slope and shift does not make it faster. """ # Use the same dtype as determined by "upgrade_to_float", # and perform computation in that dtype. out_dtype = aes.upgrade_to_float(aes.Scalar(dtype=x.dtype))[0].dtype slope = constant(0.2, dtype=out_dtype) shift = constant(0.5, dtype=out_dtype) x = (x * slope) + shift x = clip(x, 0, 1) return x
def slice_ind_dims(p, ps, n): shape = tuple(ps) if n == 0: return (p, shape) ind_slice = (slice(None), ) * (p.ndim - n) + (0, ) * n ind_shape = [ s if b is False else constant(1, "int64") for s, b in zip(shape[:-n], p.broadcastable[:-n]) ] return ( p[ind_slice], ind_shape, )
def normalize_size_param(size): """Create an Aesara value for a ``RandomVariable`` ``size`` parameter.""" if size is None: size = constant([], dtype="int64") elif isinstance(size, int): size = as_tensor_variable([size], ndim=1) elif not isinstance(size, (np.ndarray, Variable, Sequence)): raise TypeError( "Parameter size must be None, an integer, or a sequence with integers." ) else: size = cast(as_tensor_variable(size, ndim=1), "int64") assert size.dtype in int_dtypes return size
def make_node(self, x, shape): if not isinstance(x, Variable): x = aet.as_tensor_variable(x) if shape == () or shape == []: tshape = aet.constant([], dtype="int64") else: tshape = aet.as_tensor_variable(shape, ndim=1) if tshape.dtype not in aesara.tensor.type.integer_dtypes: raise AssertionError( f"The `shape` must be an integer type. Got {tshape.dtype} instead." ) if isinstance(tshape, TensorConstant) and tshape.data.size != x.ndim: ndim = len(tshape.data) raise AssertionError( f"Input `x` is {x.ndim}-dimensional and will never match a {ndim}-dimensional shape." ) return Apply(self, [x, tshape], [x.type()])
def test_grad(self): a = np.asarray(self.rng.randn(5, 5), dtype=config.floatX) x = matrix("x") expressions_gradients = [ (x * zero_grad(x), x), (x * zero_grad(exp(x)), exp(x)), (zero_grad(x), aet.constant(0.0)), (x**2 * zero_grad(x), 2 * x**2), ] for expr, expr_grad in expressions_gradients: g = grad(expr.sum(), x) # gradient according to aesara f = aesara.function([x], g, on_unused_input="ignore") # desired gradient f2 = aesara.function([x], expr_grad, on_unused_input="ignore") assert np.allclose(f(a), f2(a))
def test_multiple_out_crash(self): # This test failed up to commit 2faeb62c38 p0 = self.shared(np.asarray(np.random.random([4, 8]), dtype=self.dtype)) p1 = self.shared(np.asarray(np.random.random(8), dtype=self.dtype)) p2 = self.shared(np.asarray(np.random.random([8, 3]), dtype=self.dtype)) p3 = self.shared(np.asarray(np.random.random(3), dtype=self.dtype)) p = [p0, p1, p2, p3] # in my code these vars are the result of applying scan ften0 = tensor3("ft0", dtype=self.dtype) fmat1 = matrix("fm1", dtype=self.dtype) ften2 = tensor3("ft2", dtype=self.dtype) fmat3 = matrix("fm3", dtype=self.dtype) # then I keep only the last iteration fsub0 = ften0[-1] fsub1 = fmat1[-1] fsub2 = ften2[-1] fsub3 = fmat3[-1] fsub = [fsub0, fsub1, fsub2, fsub3] acc = at.constant(1, "int8") >= 0 new_positions = ifelse(acc, fsub, p) new_updates = [(p[0], new_positions[0])] f = function([ften0, fmat1, ften2, fmat3], [], updates=new_updates, mode=self.mode) self.assertFunctionContains1(f, self.get_ifelse(4)) i1 = np.asarray(np.random.random([19, 4, 8]), dtype=self.dtype) i2 = np.asarray(np.random.random([19, 8]), dtype=self.dtype) i3 = np.asarray(np.random.random([19, 8, 3]), dtype=self.dtype) i4 = np.asarray(np.random.random([19, 3]), dtype=self.dtype) f(i1, i2, i3, i4)
def normalize_size_param(size): """Create an Aesara value for a ``RandomVariable`` ``size`` parameter.""" if size is None: size = constant([], dtype="int64") elif isinstance(size, int): size = as_tensor_variable([size], ndim=1) elif not isinstance(size, (np.ndarray, Variable, Sequence)): raise TypeError( "Parameter size must be None, an integer, or a sequence with integers." ) else: size = cast(as_tensor_variable(size, ndim=1), "int64") if not isinstance(size, Constant): # This should help ensure that the length of non-constant `size`s # will be available after certain types of cloning (e.g. the kind # `Scan` performs) size = specify_shape(size, (get_vector_length(size), )) assert size.dtype in int_dtypes return size
def local_dimshuffle_rv_lift(fgraph, node): """Lift `DimShuffle`s through `RandomVariable` `Op`s. For example, ``normal(mu, std).T == normal(mu.T, std.T)``. The basic idea behind this optimization is that we need to separate the `DimShuffle`ing into independent `DimShuffle`s that each occur in two distinct sub-spaces: the parameters and ``size`` (i.e. replications) sub-spaces. If a `DimShuffle` exchanges dimensions across those two sub-spaces, then we don't do anything. Otherwise, if the `DimShuffle` only exchanges dimensions within each of those sub-spaces, we can break it apart and apply the parameter-space `DimShuffle` to the `RandomVariable`'s distribution parameters, and the apply the replications-space `DimShuffle` to the `RandomVariable`'s``size`` tuple. The latter is a particularly simple rearranging of a tuple, but the former requires a little more work. """ ds_op = node.op if not isinstance(ds_op, DimShuffle): return False base_rv = node.inputs[0] rv_node = base_rv.owner if not ( rv_node and isinstance(rv_node.op, RandomVariable) and rv_node.op.ndim_supp == 0 ): return False # If no one else is using the underlying `RandomVariable`, then we can # do this; otherwise, the graph would be internally inconsistent. if not all( (n == node or isinstance(n.op, Shape)) for n, i in fgraph.clients[base_rv] ): return False rv_op = rv_node.op rng, size, dtype, *dist_params = rv_node.inputs # We need to know the dimensions that were *not* added by the `size` # parameter (i.e. the dimensions corresponding to independent variates with # different parameter values) num_ind_dims = None if len(dist_params) == 1: num_ind_dims = dist_params[0].ndim else: # When there is more than one distribution parameter, assume that all # of them will broadcast to the maximum number of dimensions num_ind_dims = max(d.ndim for d in dist_params) # If the indices in `ds_new_order` are entirely within the replication # indices group or the independent variates indices group, then we can apply # this optimization. ds_new_order = ds_op.new_order # Create a map from old index order to new/`DimShuffled` index order dim_orders = [(n, d) for n, d in enumerate(ds_new_order) if isinstance(d, int)] # Find the index at which the replications/independents split occurs reps_ind_split_idx = len(dim_orders) - (num_ind_dims + rv_op.ndim_supp) ds_reps_new_dims = dim_orders[:reps_ind_split_idx] ds_ind_new_dims = dim_orders[reps_ind_split_idx:] ds_only_in_ind = ds_ind_new_dims and all( d >= reps_ind_split_idx for n, d in ds_ind_new_dims ) if ds_only_in_ind: # Update the `size` array to reflect the `DimShuffle`d dimensions, # since the trailing dimensions in `size` represent the independent # variates dimensions (for univariate distributions, at least) new_size = ( [constant(1, dtype="int64") if o == "x" else size[o] for o in ds_new_order] if get_vector_length(size) > 0 else size ) # Compute the new axes parameter(s) for the `DimShuffle` that will be # applied to the `RandomVariable` parameters (they need to be offset) rv_params_new_order = [ d - reps_ind_split_idx if isinstance(d, int) else d for d in ds_new_order[ds_ind_new_dims[0][0] :] ] # Lift the `DimShuffle`s into the parameters # NOTE: The parameters might not be broadcasted against each other, so # we can only apply the parts of the `DimShuffle` that are relevant. new_dist_params = [] for d in dist_params: if d.ndim < len(ds_ind_new_dims): _rv_params_new_order = [ o for o in rv_params_new_order if (isinstance(o, int) and o < d.ndim) or o == "x" ] else: _rv_params_new_order = rv_params_new_order new_dist_params.append( type(ds_op)(d.type.broadcastable, _rv_params_new_order)(d) ) new_node = rv_op.make_node(rng, new_size, dtype, *new_dist_params) if config.compute_test_value != "off": compute_test_value(new_node) return [new_node.outputs[1]] ds_only_in_reps = ds_reps_new_dims and all( d < reps_ind_split_idx for n, d in ds_reps_new_dims ) if ds_only_in_reps: # Update the `size` array to reflect the `DimShuffle`d dimensions. # There should be no need to `DimShuffle` now. new_size = [ constant(1, dtype="int64") if o == "x" else size[o] for o in ds_new_order ] new_node = rv_op.make_node(rng, new_size, dtype, *dist_params) if config.compute_test_value != "off": compute_test_value(new_node) return [new_node.outputs[1]] return False
def _infer_shape( self, size: Tuple[TensorVariable], dist_params: List[TensorVariable], param_shapes: Optional[List[Tuple[TensorVariable]]] = None, ) -> Tuple[ScalarVariable]: """Compute the output shape given the size and distribution parameters. Parameters ---------- size The size parameter specified for this `RandomVariable`. dist_params The symbolic parameter for this `RandomVariable`'s distribution. param_shapes The shapes of the `dist_params` as given by `ShapeFeature`'s via `Op.infer_shape`'s `input_shapes` argument. This parameter's values are essentially more accurate versions of ``[d.shape for d in dist_params]``. """ size_len = get_vector_length(size) if self.ndim_supp == 0 and size_len > 0: # In this case, we have a univariate distribution with a non-empty # `size` parameter, which means that the `size` parameter # completely determines the shape of the random variable. More # importantly, the `size` parameter may be the only correct source # of information for the output shape, in that we would be misled # by the `dist_params` if we tried to infer the relevant parts of # the output shape from those. return size # Broadcast the parameters param_shapes = params_broadcast_shapes( param_shapes or [shape_tuple(p) for p in dist_params], self.ndims_params) def slice_ind_dims(p, ps, n): shape = tuple(ps) if n == 0: return (p, shape) ind_slice = (slice(None), ) * (p.ndim - n) + (0, ) * n ind_shape = [ s if b is False else constant(1, "int64") for s, b in zip(shape[:-n], p.broadcastable[:-n]) ] return ( p[ind_slice], ind_shape, ) # These are versions of our actual parameters with the anticipated # dimensions (i.e. support dimensions) removed so that only the # independent variate dimensions are left. params_ind_slice = tuple( slice_ind_dims(p, ps, n) for p, ps, n in zip(dist_params, param_shapes, self.ndims_params)) if len(params_ind_slice) == 1: ind_param, ind_shape = params_ind_slice[0] ndim_ind = len(ind_shape) shape_ind = ind_shape elif len(params_ind_slice) > 1: # If there are multiple parameters, the dimensions of their # independent variates should broadcast together. p_slices, p_shapes = zip(*params_ind_slice) shape_ind = aesara.tensor.extra_ops.broadcast_shape_iter( p_shapes, arrays_are_shapes=True) ndim_ind = len(shape_ind) else: ndim_ind = 0 if self.ndim_supp == 0: shape_supp = tuple() shape_reps = tuple(size) if ndim_ind > 0: shape_reps = shape_reps[:-ndim_ind] ndim_reps = len(shape_reps) else: shape_supp = self._shape_from_params( dist_params, param_shapes=param_shapes, ) ndim_reps = size_len shape_reps = size ndim_shape = self.ndim_supp + ndim_ind + ndim_reps if ndim_shape == 0: shape = constant([], dtype="int64") else: shape = tuple(shape_reps) + tuple(shape_ind) + tuple(shape_supp) # if shape is None: # raise ShapeError() return shape
def _infer_shape( self, size: TensorVariable, dist_params: Sequence[TensorVariable], param_shapes: Optional[Sequence[Tuple[Variable, ...]]] = None, ) -> Union[TensorVariable, Tuple[ScalarVariable, ...]]: """Compute the output shape given the size and distribution parameters. Parameters ---------- size The size parameter specified for this `RandomVariable`. dist_params The symbolic parameter for this `RandomVariable`'s distribution. param_shapes The shapes of the `dist_params` as given by `ShapeFeature`'s via `Op.infer_shape`'s `input_shapes` argument. This parameter's values are essentially more accurate versions of ``[d.shape for d in dist_params]``. """ size_len = get_vector_length(size) if size_len > 0: if self.ndim_supp == 0: return size else: supp_shape = self._supp_shape_from_params( dist_params, param_shapes=param_shapes) return tuple(size) + tuple(supp_shape) # Broadcast the parameters param_shapes = params_broadcast_shapes( param_shapes or [shape_tuple(p) for p in dist_params], self.ndims_params) def slice_ind_dims(p, ps, n): shape = tuple(ps) if n == 0: return (p, shape) ind_slice = (slice(None), ) * (p.ndim - n) + (0, ) * n ind_shape = [ s if b is False else constant(1, "int64") for s, b in zip(shape[:-n], p.broadcastable[:-n]) ] return ( p[ind_slice], ind_shape, ) # These are versions of our actual parameters with the anticipated # dimensions (i.e. support dimensions) removed so that only the # independent variate dimensions are left. params_ind_slice = tuple( slice_ind_dims(p, ps, n) for p, ps, n in zip(dist_params, param_shapes, self.ndims_params)) if len(params_ind_slice) == 1: _, shape_ind = params_ind_slice[0] elif len(params_ind_slice) > 1: # If there are multiple parameters, the dimensions of their # independent variates should broadcast together. p_slices, p_shapes = zip(*params_ind_slice) shape_ind = aesara.tensor.extra_ops.broadcast_shape_iter( p_shapes, arrays_are_shapes=True) else: # Distribution has no parameters shape_ind = () if self.ndim_supp == 0: shape_supp = () else: shape_supp = self._supp_shape_from_params( dist_params, param_shapes=param_shapes, ) shape = tuple(shape_ind) + tuple(shape_supp) if not shape: shape = constant([], dtype="int64") return shape
def _run(self, num_features, num_timesteps, batch_size, mode): # determine shapes of inputs and targets depending on the batch size if batch_size == 1: inputs_size = (num_timesteps, num_features) targets_size = (num_timesteps, 1) else: inputs_size = (num_timesteps, batch_size, num_features) targets_size = (num_timesteps, batch_size, 1) # make inputs and targets shared variables inputs = aesara.shared(self.rng.uniform(size=inputs_size).astype( config.floatX), borrow=True) targets = aesara.shared(self.rng.uniform(size=targets_size).astype( config.floatX), borrow=True) # create symbolic inputs and targets variables if batch_size == 1: x = matrix("inputs") t = matrix("targets") else: x = tensor3("inputs") t = tensor3("inputs") x.tag.test_value = inputs.get_value(borrow=True) t.tag.test_value = targets.get_value(borrow=True) # create a set of parameters for a simple RNN W_xh = aesara.shared( (0.01 * self.rng.uniform(size=(num_features, 10))).astype( config.floatX), borrow=True, ) W_hh = aesara.shared( (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX), borrow=True) W_hy = aesara.shared( (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX), borrow=True) b_h = aesara.shared(np.zeros(10).astype(config.floatX), borrow=True) b_y = aesara.shared(np.zeros(1).astype(config.floatX), borrow=True) params = [W_xh, W_hh, W_hy, b_h, b_y] # recurrent function def step(x_t, h_tm1): h = tanh(dot(h_tm1, W_hh) + dot(x_t, W_xh) + b_h) return h # build recurrent graph if batch_size == 1: h_0 = aet.alloc(0.0, 10).astype(config.floatX) else: h_0 = aet.alloc(0.0, batch_size, 10).astype(config.floatX) h, updates = aesara.scan(step, sequences=[x], outputs_info=[h_0]) # network output y = dot(h, W_hy) + b_y # Create Gauss-Newton-Matrix object. Not really of any use here, but I # need it for Hessian-Free optimization. gn = GaussNewtonMatrix(y) # compute MSE cost = ((t - y)**2).sum(axis=1).mean() # Compute the cost at some other point in the parameter # space. Not really of any use here, but this is how I do it # during certain iterations of CG in the HF algorithm. There, # it's in fact `pi + current update proposal`. For simplicity, # I just multiply by 2 here. cost_ = aesara.clone_replace(cost, replace={pi: 2 * pi for pi in params}) # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG, # but for simplicity, I just take the parameters vector because it's # already there. Gv = gn(v=params, cost=cost, parameters=params, damp=aet.constant(1.0)) # compile Aesara function f = aesara.function([], [cost_] + Gv, givens={ x: inputs, t: targets }, mode=mode) # execute f()
def test_empty_shp(self): const = constant([1]).reshape(()) f = function([], const) assert f().shape == ()
def local_dimshuffle_subtensor(node): """If a subtensor is inside a dimshuffle which only drop broadcastable dimensions, scrap the dimshuffle and index the subtensor with 0 x[i:j, :, k:l].dimshuffle(0, 2) => x[i:j, 0, k:l] if x.broadcastable == (False, True, False) """ if isinstance(node.op, DimShuffle) and node.inputs[0].owner: # the dimshuffle can only drop dimensions (cannot reshape nor add 'x') if "x" in node.op.new_order: return False new_order = node.op.new_order # new order could be empty # Verif that we don't change dimensions order. if len(new_order) > 1: past_dim = new_order[0] for dim in new_order[1:]: if not dim > past_dim: return False else: past_dim = dim input_ = node.inputs[0] if isinstance(input_.owner.op, Subtensor): # the arguments missing from the dimshuffles must be dims # that are broadcastable broadcastable = input_.broadcastable missing_dims = list(range(input_.ndim)) for dim in new_order: missing_dims.remove(dim) if not all([broadcastable[i] for i in missing_dims]): return False # create a new idx_list for a new Subtensor object # have to loop on idx_list and inputs # inputs has the length of sum of non None elements of idx_list # (check in slice!). # len(missing_dims) can be < len(idx_list), this happens if # tensor was indexed such as x[scalar, :, :], check that as well new_idx_list = list(input_.owner.op.idx_list) new_inputs = [input_.owner.inputs[0]] zero = tt.constant(0) slice_attr_list = ["start", "stop", "step"] j = 0 slice_i = -1 subtensor_removed_dims = 0 for i, idx in enumerate(input_.owner.op.idx_list): if isinstance(idx, slice): past_j = j slice_i += 1 for slice_attr in slice_attr_list: if getattr(idx, slice_attr) is not None: new_inputs += [input_.owner.inputs[1 + j]] j += 1 # if past_j == j indicates a slice(None, None, None), # that's where we want to index with 0 if it is also at # the same spot of a missing dim if past_j == j and slice_i in missing_dims: new_idx_list[i] = zero new_inputs += [zero] else: new_inputs += [input_.owner.inputs[1 + j]] j += 1 subtensor_removed_dims += 1 # Verify the trailing dimensions the subtensor didn't look at. for idx in range(len(input_.owner.op.idx_list), new_inputs[0].ndim): if (idx - subtensor_removed_dims) in missing_dims: while len(new_idx_list) < idx: new_idx_list.append(slice(None)) new_idx_list.append(zero) new_inputs.append(zero) return [Subtensor(new_idx_list)(*new_inputs)] return False