def test_merge_with_weird_eq(): """numpy arrays don't compare equal like other python objects""" #SCALAR CASE x = T.constant(numpy.asarray(1), name='x') y = T.constant(numpy.asarray(1), name='y') g = Env([x, y], [x+y]) MergeOptimizer().optimize(g) assert len(g.nodes) == 1 node = list(g.nodes)[0] assert len(node.inputs) == 2 assert node.inputs[0] is node.inputs[1] #NONSCALAR CASE # This was created to test TensorConstantSignature x = T.constant(numpy.ones(5), name='x') y = T.constant(numpy.ones(5), name='y') g = Env([x, y], [x+y]) MergeOptimizer().optimize(g) assert len(g.nodes) == 1 node = list(g.nodes)[0] assert len(node.inputs) == 2 assert node.inputs[0] is node.inputs[1]
def test_merge_with_weird_eq(): """numpy arrays don't compare equal like other python objects""" # SCALAR CASE x = T.constant(np.asarray(1), name='x') y = T.constant(np.asarray(1), name='y') g = Env([x, y], [x+y]) MergeOptimizer().optimize(g) assert len(g.apply_nodes) == 1 node = list(g.apply_nodes)[0] assert len(node.inputs) == 2 assert node.inputs[0] is node.inputs[1] # NONSCALAR CASE # This was created to test TensorConstantSignature x = T.constant(np.ones(5), name='x') y = T.constant(np.ones(5), name='y') g = Env([x, y], [x+y]) MergeOptimizer().optimize(g) assert len(g.apply_nodes) == 1 node = list(g.apply_nodes)[0] assert len(node.inputs) == 2 assert node.inputs[0] is node.inputs[1]
def grad(self, inputs, grads): x, scale, bias, est_mean, est_var, epsilon = inputs dy = grads[0] axes = self.axes if min(axes) < 0 or max(axes) >= x.ndim: raise ValueError( f"axes should be less than ndim (<{x.ndim}), but {axes} given" ) scale, bias, est_mean, est_var = ( tt.addbroadcast(t, *axes) for t in (scale, bias, est_mean, est_var) ) # define helper expressions est_var_eps = est_var + epsilon est_std = tt.sqrt(est_var_eps) two = tt.constant(2.0) # define and return gradients dx = dy * (scale / est_std) dscale = (dy * (x - est_mean)).sum(axes, keepdims=True) / est_std dbias = dy.sum(axes, keepdims=True) dmean = -dy.sum(axes, keepdims=True) * (scale / est_std) dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * ( scale / (two * est_var_eps * est_std) ) return [dx, dscale, dbias, dmean, dvar, theano.gradient.DisconnectedType()()]
def hard_sigmoid(x): """An approximation of sigmoid. More approximate and faster than ultra_fast_sigmoid. Approx in 3 parts: 0, scaled linear, 1 Removing the slope and shift does not make it faster. """ # Use the same dtype as determined by "upgrade_to_float", # and perform computation in that dtype. out_dtype = scalar.upgrade_to_float(scalar.Scalar(dtype=x.dtype))[0].dtype slope = tensor.constant(0.2, dtype=out_dtype) shift = tensor.constant(0.5, dtype=out_dtype) x = (x * slope) + shift x = tensor.clip(x, 0, 1) return x
def slice_ind_dims(p, ps, n): shape = tuple(ps) if n == 0: return (p, shape) ind_slice = (slice(None),) * (p.ndim - n) + (0,) * n ind_shape = [ s if b is False else constant(1, "int64") for s, b in zip(shape[:-n], p.broadcastable[:-n]) ] return ( p[ind_slice], ind_shape, )
def local_0_dot_x(node): if not isinstance(node.op, T.Dot): return False x = node.inputs[0] y = node.inputs[1] replace = False try: if get_scalar_constant_value(x) == 0: replace = True except NotScalarConstantError: pass try: if get_scalar_constant_value(y) == 0: replace = True except NotScalarConstantError: pass if replace: constant_zero = T.constant(0, dtype=node.outputs[0].type.dtype) if x.ndim == 2 and y.ndim == 2: constant_zero = assert_(constant_zero, T.eq(x.shape[1], y.shape[0])) return [T.alloc(constant_zero, x.shape[0], y.shape[1])] elif x.ndim == 1 and y.ndim == 2: constant_zero = assert_(constant_zero, T.eq(x.shape[0], y.shape[0])) return [T.alloc(constant_zero, y.shape[1])] elif x.ndim == 2 and y.ndim == 1: constant_zero = assert_(constant_zero, T.eq(x.shape[1], y.shape[0])) return [T.alloc(constant_zero, x.shape[0])] elif x.ndim == 1 and y.ndim == 1: constant_zero = assert_(constant_zero, T.eq(x.shape[0], y.shape[0])) return [constant_zero] else: _logger.warning("Optimization Warning: " "Optimization theano/opt.py:local_0_dot_x Found " "that it could apply, but was not implemented " "for dot product with these input types:\n" "(%s, %s)", x.type, y.type)
def make_node(self, rng, size, dtype, *dist_params): """Create a random variable node. XXX: Unnamed/non-keyword arguments are considered distribution parameters! If you want to set `size`, `rng`, and/or `name`, use their keywords. Parameters ---------- rng: RandomStateType Existing Theano `RandomState` object to be used. Creates a new one, if `None`. size: int or Sequence Numpy-like size of the output (i.e. replications). dtype: Theano dtype The dtype of the sampled output. This value is only used when `self.dtype` isn't set. dist_params: list Distribution parameters. Results ------- out: `Apply` A node with inputs `(rng, size, dtype) + dist_args` and outputs `(rng_var, out_var)`. """ if size is None: size = constant([], dtype="int64") elif isinstance(size, int): size = as_tensor_variable([size], ndim=1) elif not isinstance(size, (np.ndarray, Variable, Sequence)): raise TypeError( "Parameter size must be None, an integer, or a sequence with integers." ) else: size = cast(as_tensor_variable(size, ndim=1), "int64") assert size.dtype in int_dtypes dist_params = tuple( as_tensor_variable(p) if not isinstance(p, Variable) else p for p in dist_params ) if rng is None: rng = theano.shared(np.random.RandomState()) elif not isinstance(rng.type, RandomStateType): raise TypeError("The type of rng should be an instance of RandomStateType") bcast = self.compute_bcast(dist_params, size) dtype = self.dtype or dtype if dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes): # dtype = tt.scal.upcast(self.dtype, *[p.dtype for p in dist_params]) raise TypeError("dtype is unspecified") if isinstance(dtype, str): dtype_idx = constant(all_dtypes.index(dtype), dtype="int64") else: dtype_idx = constant(dtype, dtype="int64") dtype = all_dtypes[dtype_idx.data] outtype = TensorType(dtype=dtype, broadcastable=bcast) out_var = outtype() inputs = (rng, size, dtype_idx) + dist_params outputs = (rng.type(), out_var) return Apply(self, inputs, outputs)
def _infer_shape(self, size, dist_params, param_shapes=None): """Compute the output shape given the size and distribution parameters. Parameters ---------- size : TensorVariable The size parameter specified for this `RandomVariable`. dist_params : list of TensorVariable The symbolic parameter for this `RandomVariable`'s distribution. param_shapes : list of tuples of TensorVariable (optional) The shapes of the `dist_params` as given by `ShapeFeature`'s via `Op.infer_shape`'s `input_shapes` argument. This parameter's values are essentially more accurate versions of ``[d.shape for d in dist_params]``. Outputs ------- shape : tuple of `ScalarVariable` """ size_len = get_vector_length(size) if self.ndim_supp == 0 and size_len > 0: # In this case, we have a univariate distribution with a non-empty # `size` parameter, which means that the `size` parameter # completely determines the shape of the random variable. More # importantly, the `size` parameter may be the only correct source # of information for the output shape, in that we would be misled # by the `dist_params` if we tried to infer the relevant parts of # the output shape from those. return size # Broadcast the parameters param_shapes = params_broadcast_shapes( param_shapes or [p.shape for p in dist_params], self.ndims_params ) def slice_ind_dims(p, ps, n): shape = tuple(ps) if n == 0: return (p, shape) ind_slice = (slice(None),) * (p.ndim - n) + (0,) * n ind_shape = [ s if b is False else constant(1, "int64") for s, b in zip(shape[:-n], p.broadcastable[:-n]) ] return ( p[ind_slice], ind_shape, ) # These are versions of our actual parameters with the anticipated # dimensions (i.e. support dimensions) removed so that only the # independent variate dimensions are left. params_ind_slice = tuple( slice_ind_dims(p, ps, n) for p, ps, n in zip(dist_params, param_shapes, self.ndims_params) ) if len(params_ind_slice) == 1: ind_param, ind_shape = params_ind_slice[0] ndim_ind = len(ind_shape) shape_ind = ind_shape elif len(params_ind_slice) > 1: # If there are multiple parameters, the dimensions of their # independent variates should broadcast together. p_slices, p_shapes = zip(*params_ind_slice) shape_ind = theano.tensor.extra_ops.broadcast_shape_iter( p_shapes, arrays_are_shapes=True ) ndim_ind = len(shape_ind) else: ndim_ind = 0 if self.ndim_supp == 0: shape_supp = tuple() shape_reps = tuple(size) if ndim_ind > 0: shape_reps = shape_reps[:-ndim_ind] ndim_reps = len(shape_reps) else: shape_supp = self._shape_from_params( dist_params, param_shapes=param_shapes, ) ndim_reps = size_len shape_reps = size ndim_shape = self.ndim_supp + ndim_ind + ndim_reps if ndim_shape == 0: shape = constant([], dtype="int64") else: shape = tuple(shape_reps) + tuple(shape_ind) + tuple(shape_supp) # if shape is None: # raise ShapeError() return shape
def local_dimshuffle_subtensor(node): """If a subtensor is inside a dimshuffle which only drop broadcastable dimensions, scrap the dimshuffle and index the subtensor with 0 x[i:j, :, k:l].dimshuffle(0, 2) => x[i:j, 0, k:l] if x.broadcastable == (False, True, False) """ if isinstance(node.op, DimShuffle) and node.inputs[0].owner: # the dimshuffle can only drop dimensions (cannot reshape nor add 'x') if 'x' in node.op.new_order: return False new_order = node.op.new_order # new order could be empty if len(new_order) > 1: past_dim = new_order[0] for dim in new_order[1:]: if not dim > past_dim: return False else: past_dim = dim input_ = node.inputs[0] if isinstance(input_.owner.op, Subtensor): # the arguments missing from the dimshuffles must be dims # that are broadcastable broadcastable = input_.broadcastable missing_dims = list(range(input_.ndim)) for dim in new_order: missing_dims.remove(dim) if not all([broadcastable[i] for i in missing_dims]): return False # create a new idx_list for a new Subtensor object # have to loop on idx_list and inputs # inputs has the length of sum of non None elements of idx_list # (check in slice!). # len(missing_dims) can be < len(idx_list), this happens if # tensor was indexed such as x[scalar, :, :], check that as well new_idx_list = list(input_.owner.op.idx_list) new_inputs = [input_.owner.inputs[0]] zero = T.constant(0) slice_attr_list = ['start', 'stop', 'step'] j = 0 slice_i = -1 subtensor_removed_dims = 0 for idx in input_.owner.op.idx_list: if isinstance(idx, slice): past_j = j slice_i += 1 for slice_attr in slice_attr_list: if getattr(idx, slice_attr) is not None: new_inputs += [input_.owner.inputs[1 + j]] j += 1 # if past_j == j indicates a slice(None, None, None), # that's where we want to index with 0 if it is also at # the same spot of a missing dim if past_j == j and slice_i in missing_dims: new_idx_list[j] = zero new_inputs += [zero] else: new_inputs += [input_.owner.inputs[1 + j]] j += 1 subtensor_removed_dims += 1 # Verify the trailing dimensions the subtensor didn't look at. for idx in range(len(input_.owner.op.idx_list), new_inputs[0].ndim): if (idx - subtensor_removed_dims) in missing_dims: while len(new_idx_list) < idx: new_idx_list.append(slice(None)) new_idx_list.append(zero) new_inputs.append(zero) return [Subtensor(new_idx_list)(*new_inputs)] return False