def __init__( self, alpha: Tensor, beta: Tensor, zero_probability: Tensor, one_probability: Tensor, ) -> None: F = getF(alpha) self.alpha = alpha self.beta = beta self.zero_probability = zero_probability self.one_probability = one_probability self.beta_probability = 1 - zero_probability - one_probability self.beta_distribution = Beta(alpha=alpha, beta=beta) mixture_probs = F.stack(zero_probability, one_probability, self.beta_probability, axis=-1) super().__init__( components=[ Deterministic(alpha.zeros_like()), Deterministic(alpha.ones_like()), self.beta_distribution, ], mixture_probs=mixture_probs, )
def __init__(self, alpha: Tensor, beta: Tensor, one_probability: Tensor) -> None: super().__init__( alpha=alpha, beta=beta, zero_probability=alpha.zeros_like(), one_probability=one_probability, )
def quantile(self, level: Tensor) -> Tensor: F = self.F for _ in range(self.all_dim): level = level.expand_dims(axis=-1) condition = F.broadcast_greater(level, level.zeros_like() + 0.5) u = F.where(condition, F.log(2.0 * level), -F.log(2.0 - 2.0 * level)) return F.broadcast_add(self.mu, F.broadcast_mul(self.b, u))
def compute_scale(self, F, data: Tensor, observed_indicator: Tensor) -> Tensor: """ Parameters ---------- F A module that can either refer to the Symbol API or the NDArray API in MXNet. data tensor containing the data to be scaled. observed_indicator observed_indicator: binary tensor with the same shape as ``data``, that has 1 in correspondence of observed data points, and 0 in correspondence of missing data points. Returns ------- Tensor shape (N, T, C) or (N, C, T) scaled along the specified axis. """ axis_zero = nd.prod( data == data.zeros_like(), self.axis, keepdims=True ) # Along the specified axis, which array are always at zero axis_zero = nd.broadcast_to( axis_zero, shape=data.shape) # Broadcast it to the shape of data min_val = nd.where( 1 - observed_indicator, nd.broadcast_to(data.max(keepdims=True), shape=data.shape), data, ).min( axis=self.axis, keepdims=True ) # return the min value along specified axis while ignoring value according to observed_indicator max_val = nd.where( 1 - observed_indicator, nd.broadcast_to(data.min(keepdims=True), shape=data.shape), data, ).max( axis=self.axis, keepdims=True ) # return the max value along specified axis while ignoring value according to observed_indicator scaled_data = (data - min_val) / (max_val - min_val) # Rescale scaled_data = nd.where( axis_zero, scaled_data.zeros_like(), scaled_data ) # Clip Nan values to zero if the data was equal to zero along specified axis scaled_data = nd.where( scaled_data != scaled_data, scaled_data.ones_like(), scaled_data ) # Clip the Nan values to one. scaled_date!=scaled_data tells us where the Nan values are in scaled_data return nd.where( 1 - observed_indicator, scaled_data.zeros_like(), scaled_data ) # Replace data with zero where observed_indicator tells us to.
def quantile(self, level: Tensor) -> Tensor: F = self.F # self.bin_probs.shape = (batch_shape, num_bins) probs = self.bin_probs.transpose() # (num_bins, batch_shape.T) # (batch_shape) zeros_batch_size = F.zeros_like( F.slice_axis(self.bin_probs, axis=-1, begin=0, end=1).squeeze( axis=-1 ) ) level = level.expand_dims(axis=0) # cdf shape (batch_size.T, levels) zeros_cdf = F.broadcast_add( zeros_batch_size.transpose().expand_dims(axis=-1), level.zeros_like(), ) start_state = (zeros_cdf, zeros_cdf.astype("int32")) def step(p, state): cdf, idx = state cdf = F.broadcast_add(cdf, p.expand_dims(axis=-1)) idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1) return zeros_batch_size, (cdf, idx) _, states = F.contrib.foreach(step, probs, start_state) _, idx = states # idx.shape = (batch.T, levels) # centers.shape = (batch, num_bins) # # expand centers to shape -> (levels, batch, num_bins) # so we can use pick with idx.T.shape = (levels, batch) # # zeros_cdf.shape (batch.T, levels) centers_expanded = F.broadcast_add( self.bin_centers.transpose().expand_dims(axis=-1), zeros_cdf.expand_dims(axis=0), ).transpose() # centers_expanded.shape = (levels, batch, num_bins) # idx.shape (batch.T, levels) a = centers_expanded.pick(idx.transpose(), axis=-1) return a
def log_prob(self, x: Tensor) -> Tensor: F = self.F # mask zeros for the Beta distribution input to prevent NaN gradients inputs = F.where(F.logical_or(x == 0, x == 1), x.zeros_like() + 0.5, x) # compute log density, case by case return F.where( x == 1, F.log(self.one_probability.broadcast_like(x)), F.where( x == 0, F.log(self.zero_probability.broadcast_like(x)), F.log(self.beta_probability) + self.beta_distribution.log_prob(inputs), ), )
def quantile(self, level: Tensor) -> Tensor: F = self.F # we consider level to be an independent axis and so expand it # to shape (num_levels, 1, 1, ...) for _ in range(self.all_dim): level = level.expand_dims(axis=-1) quantiles = F.broadcast_mul(self.value, level.ones_like()) level = F.broadcast_mul(quantiles.ones_like(), level) minus_inf = -quantiles.ones_like() / 0.0 quantiles = F.where( F.broadcast_logical_or(level != 0, F.contrib.isnan(quantiles)), quantiles, minus_inf, ) nans = level.zeros_like() / 0.0 quantiles = F.where(level != level, nans, quantiles) return quantiles
def s(mu: Tensor, sigma: Tensor) -> Tensor: raw_samples = self.F.sample_normal(mu=mu.zeros_like(), sigma=sigma.ones_like(), dtype=dtype) return sigma * raw_samples + mu
def nans_like(x: Tensor) -> Tensor: return x.zeros_like() / 0.0
def s(low: Tensor, high: Tensor) -> Tensor: raw_samples = self.F.sample_uniform(low=low.zeros_like(), high=high.ones_like(), dtype=dtype) return low + raw_samples * (high - low)