def erf(F, x: Tensor): if MXNET_HAS_ERF: return F.erf(x) # Using numerical recipes approximation for erf function # accurate to 1E-7 ones = x.ones_like() zeros = x.zeros_like() t = ones / (ones + 0.5 * x.abs()) coefficients = [ 1.00002368, 0.37409196, 0.09678418, -0.18628806, 0.27886807, -1.13520398, 1.48851587, -0.82215223, 0.17087277, ] inner = zeros for c in coefficients[::-1]: inner = t * (c + inner) res = ones - t * (inner - 1.26551223 - x.square()).exp() return F.where(F.broadcast_greater_equal(x, zeros), res, -1.0 * res)
def quantile(self, level: Tensor) -> Tensor: F = self.F probs = self.bin_probs.swapaxes(0, 1) # (num_bins, batch) zeros_batch_size = F.slice_axis(probs, axis=0, begin=0, end=1).squeeze(axis=0) # (batch_size,) level = level.expand_dims(axis=0) # cdf shape (batch_size, levels) zeros_cdf = F.broadcast_add(zeros_batch_size.expand_dims(axis=1), level.zeros_like()) start_state = (zeros_cdf, zeros_cdf.astype("int32")) def step(p, state): cdf, idx = state cdf = F.broadcast_add(cdf, p.expand_dims(axis=1)) idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1) return zeros_batch_size, (cdf, idx) _, states = F.contrib.foreach(step, probs, start_state) _, idx = states # expand centers to shape (batch, levels, num_bins) # so we can use pick with idx.shape = (batch, levels) centers_expanded = F.broadcast_add( self.bin_centers.expand_dims(axis=1), zeros_cdf.expand_dims(axis=-1), ) a = centers_expanded.pick(idx, axis=-1) return a.swapaxes(0, 1)
def __init__( self, alpha: Tensor, beta: Tensor, zero_probability: Tensor, one_probability: Tensor, ) -> None: F = getF(alpha) self.alpha = alpha self.beta = beta self.zero_probability = zero_probability self.one_probability = one_probability self.beta_probability = 1 - zero_probability - one_probability self.beta_distribution = Beta(alpha=alpha, beta=beta) mixture_probs = F.stack(zero_probability, one_probability, self.beta_probability, axis=-1) super().__init__( components=[ Deterministic(alpha.zeros_like()), Deterministic(alpha.ones_like()), self.beta_distribution, ], mixture_probs=mixture_probs, )
def __init__(self, alpha: Tensor, beta: Tensor, one_probability: Tensor) -> None: super().__init__( alpha=alpha, beta=beta, zero_probability=alpha.zeros_like(), one_probability=one_probability, )
def quantile(self, level: Tensor) -> Tensor: F = self.F for _ in range(self.all_dim): level = level.expand_dims(axis=-1) condition = F.broadcast_greater(level, level.zeros_like() + 0.5) u = F.where(condition, F.log(2.0 * level), -F.log(2.0 - 2.0 * level)) return F.broadcast_add(self.mu, F.broadcast_mul(self.b, u))
def compute_scale(self, F, data: Tensor, observed_indicator: Tensor) -> Tensor: """ Parameters ---------- F A module that can either refer to the Symbol API or the NDArray API in MXNet. data tensor containing the data to be scaled. observed_indicator observed_indicator: binary tensor with the same shape as ``data``, that has 1 in correspondence of observed data points, and 0 in correspondence of missing data points. Returns ------- Tensor shape (N, T, C) or (N, C, T) scaled along the specified axis. """ axis_zero = nd.prod( data == data.zeros_like(), self.axis, keepdims=True ) # Along the specified axis, which array are always at zero axis_zero = nd.broadcast_to( axis_zero, shape=data.shape) # Broadcast it to the shape of data min_val = nd.where( 1 - observed_indicator, nd.broadcast_to(data.max(keepdims=True), shape=data.shape), data, ).min( axis=self.axis, keepdims=True ) # return the min value along specified axis while ignoring value according to observed_indicator max_val = nd.where( 1 - observed_indicator, nd.broadcast_to(data.min(keepdims=True), shape=data.shape), data, ).max( axis=self.axis, keepdims=True ) # return the max value along specified axis while ignoring value according to observed_indicator scaled_data = (data - min_val) / (max_val - min_val) # Rescale scaled_data = nd.where( axis_zero, scaled_data.zeros_like(), scaled_data ) # Clip Nan values to zero if the data was equal to zero along specified axis scaled_data = nd.where( scaled_data != scaled_data, scaled_data.ones_like(), scaled_data ) # Clip the Nan values to one. scaled_date!=scaled_data tells us where the Nan values are in scaled_data return nd.where( 1 - observed_indicator, scaled_data.zeros_like(), scaled_data ) # Replace data with zero where observed_indicator tells us to.
def quantile(self, level: Tensor) -> Tensor: F = self.F # self.bin_probs.shape = (batch_shape, num_bins) probs = self.bin_probs.transpose() # (num_bins, batch_shape.T) # (batch_shape) zeros_batch_size = F.zeros_like( F.slice_axis(self.bin_probs, axis=-1, begin=0, end=1).squeeze( axis=-1 ) ) level = level.expand_dims(axis=0) # cdf shape (batch_size.T, levels) zeros_cdf = F.broadcast_add( zeros_batch_size.transpose().expand_dims(axis=-1), level.zeros_like(), ) start_state = (zeros_cdf, zeros_cdf.astype("int32")) def step(p, state): cdf, idx = state cdf = F.broadcast_add(cdf, p.expand_dims(axis=-1)) idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1) return zeros_batch_size, (cdf, idx) _, states = F.contrib.foreach(step, probs, start_state) _, idx = states # idx.shape = (batch.T, levels) # centers.shape = (batch, num_bins) # # expand centers to shape -> (levels, batch, num_bins) # so we can use pick with idx.T.shape = (levels, batch) # # zeros_cdf.shape (batch.T, levels) centers_expanded = F.broadcast_add( self.bin_centers.transpose().expand_dims(axis=-1), zeros_cdf.expand_dims(axis=0), ).transpose() # centers_expanded.shape = (levels, batch, num_bins) # idx.shape (batch.T, levels) a = centers_expanded.pick(idx.transpose(), axis=-1) return a
def erfinv(F, x: Tensor) -> Tensor: if MXNET_HAS_ERFINV: return F.erfinv(x) zeros = x.zeros_like() w = -F.log(F.broadcast_mul((1.0 - x), (1.0 + x))) mask_lesser = F.broadcast_lesser(w, zeros + 5.0) w = F.where(mask_lesser, w - 2.5, F.sqrt(w) - 3.0) coefficients_lesser = [ 2.81022636e-08, 3.43273939e-07, -3.5233877e-06, -4.39150654e-06, 0.00021858087, -0.00125372503, -0.00417768164, 0.246640727, 1.50140941, ] coefficients_greater_equal = [ -0.000200214257, 0.000100950558, 0.00134934322, -0.00367342844, 0.00573950773, -0.0076224613, 0.00943887047, 1.00167406, 2.83297682, ] p = F.where( mask_lesser, coefficients_lesser[0] + zeros, coefficients_greater_equal[0] + zeros, ) for c_l, c_ge in zip(coefficients_lesser[1:], coefficients_greater_equal[1:]): c = F.where(mask_lesser, c_l + zeros, c_ge + zeros) p = c + F.broadcast_mul(p, w) return F.broadcast_mul(p, x)
def log_prob(self, x: Tensor) -> Tensor: F = self.F # mask zeros for the Beta distribution input to prevent NaN gradients inputs = F.where(F.logical_or(x == 0, x == 1), x.zeros_like() + 0.5, x) # compute log density, case by case return F.where( x == 1, F.log(self.one_probability.broadcast_like(x)), F.where( x == 0, F.log(self.zero_probability.broadcast_like(x)), F.log(self.beta_probability) + self.beta_distribution.log_prob(inputs), ), )
def quantile(self, level: Tensor) -> Tensor: F = getF(level) sign = 1.0 for t in self.transforms: sign = sign * t.sign if not isinstance(sign, (mx.nd.NDArray, mx.sym.Symbol)): sign = sign + level.zeros_like() cond = F.broadcast_greater(sign, sign.zeros_like()) level = F.broadcast_mul(cond, level) + F.broadcast_mul( 1.0 - cond, 1.0 - level) q = self.base_distribution.quantile(level) for t in self.transforms: q = t.f(q) return q
def quantile(self, level: Tensor) -> Tensor: F = self.F # we consider level to be an independent axis and so expand it # to shape (num_levels, 1, 1, ...) for _ in range(self.all_dim): level = level.expand_dims(axis=-1) quantiles = F.broadcast_mul(self.value, level.ones_like()) level = F.broadcast_mul(quantiles.ones_like(), level) minus_inf = -quantiles.ones_like() / 0.0 quantiles = F.where( F.broadcast_logical_or(level != 0, F.contrib.isnan(quantiles)), quantiles, minus_inf, ) nans = level.zeros_like() / 0.0 quantiles = F.where(level != level, nans, quantiles) return quantiles
def nans_like(x: Tensor) -> Tensor: return x.zeros_like() / 0.0
def s(mu: Tensor, sigma: Tensor) -> Tensor: raw_samples = self.F.sample_normal(mu=mu.zeros_like(), sigma=sigma.ones_like()) return sigma * raw_samples + mu
def s(low: Tensor, high: Tensor) -> Tensor: raw_samples = self.F.sample_uniform(low=low.zeros_like(), high=high.ones_like(), dtype=dtype) return low + raw_samples * (high - low)