def log_pdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) norm = np.log(np.sqrt(2)) - np.log(self.scale * np.sqrt(np.pi)) p = norm - (X**2 / (2 * self.variance)) return np.where(X >= 0, p, 1)
def pdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) norm = np.sqrt(2) / (self.scale * np.sqrt(np.pi)) p = norm * np.exp(-X**2 / (2 * self.scale**2)) return np.where(X > 0, p, 0)
def partial_fit(self, X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # first fit if not hasattr(self, '_n_samples'): self._n_samples = 0 # Update center and variance if self._empirical_variance is None: self._n_samples += X.shape[0] - np.isnan(X).sum() self._empirical_variance = np.nanvar(X) else: # previous values prev_size = self._n_samples prev_variance = self._empirical_variance # new values curr_size = X.shape[0] - np.isnan(X).sum() curr_variance = np.nanvar(X) # update size self._n_samples = prev_size + curr_size # update variance self._empirical_variance = ( (prev_variance * prev_size) + (curr_variance * curr_size)) / self._n_samples norm = (1 - (2 / np.pi)) self.scale = _handle_zeros_in_scale( np.sqrt(self._empirical_variance / norm)) return self
def quantile(self, *q): """Quantile Function Also known as the inverse cumulative Distribution function, this function takes known quantiles and returns the associated `X` value from the support domain. .. math:: \begin{cases} 0 &\text{if } 0 \leq q \lt p 1 &\text{if } p \leq q \lt 1 \end{cases} Parameters ---------- q : numpy.ndarray, float The probabilities within domain [0, 1] Returns ------- numpy.ndarray The `X` values from the support domain associated with the input quantiles. """ # check array for numpy structure q = check_array(q, reduce_args=True, ensure_1d=True) out = np.ceil(sc.bdtrik(q, 1, self.bias)) return np.where(self.bias >= q, 0, out)
def log_pdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) norm = 2 * self.variance log_scale = np.log(self.scale) + np.log(np.sqrt(2 * np.pi)) return -((X - self.center)**2) / norm - log_scale
def partial_fit(self, X): # check_array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True).astype(float) # identify values outside of support X[self.support.not_contains(X)] = np.nan # first fit if not hasattr(self, "_n_samples"): self._n_samples = 0 if self._mean is None: self._n_samples += X.shape[0] - np.isnan(X).sum() self._mean = np.nanmean(X) else: # previous values prev_size = self._n_samples prev_mean = self._mean # new values curr_size = X.shape[0] - np.isnan(X).sum() curr_mean = np.nanmean(X) # update size self._n_samples = prev_size + curr_size # update mean self._mean = ((prev_mean * prev_size) + (curr_mean * curr_size)) / self._n_samples self.bias = 1 / self._mean return self
def log_pmf(self, *X): """Log Probability Mass Function The probability mass function for the Bernoulli distribution is given by two cases. .. math:: \begin{cases} 1-p &\text{if } X = 0\\ p &\text{if } X = 1 \end{cases} where `p` is the :code:`bias` in favor of a positive event Parameters ---------- X : numpy.ndarray, int 1D dataset which falls within the domain of the given distribution support. The Bernoulli distribution expects series of 0 or 1 only. This value is often denoted `k` in the literature. Returns ------- numpy.ndarray The output log transformed probability mass reported elementwise with respect to the input data. """ # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) return np.log(self.pmf(X))
def log_pdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) norm = sc.betaln(self.alpha, self.beta) p = (self.alpha - 1) * np.log(X) + (self.beta - 1) * np.log(1 - X) return p - norm
def partial_fit(self, X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # first fit if not hasattr(self, '_n_samples'): self._n_samples = 0 # Update rate if self.rate is None: self._n_samples += X.shape[0] - np.isnan(X).sum() self.rate = np.nanmean(X) else: # previous values prev_size = self._n_samples prev_rate = self.rate # new values curr_size = X.shape[0] - np.isnan(X).sum() curr_rate = np.nanmean(X) # update size self._n_samples = prev_size + curr_size # update rate self.rate = ((prev_rate * prev_size) + (curr_rate * curr_size)) / self._n_samples return self
def pdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) norm = sc.beta(self.alpha, self.beta) p = np.power(X, self.alpha - 1) * np.power(1 - X, self.beta - 1) return p / norm
def quantile(self, *q): # check array for numpy structure q = check_array(q, reduce_args=True, ensure_1d=True) if self.high_inclusive: return self.low + q * (self.high - self.low) return self.low + q * ((self.high - 1) - self.low)
def log_cdf(self, X): """Log Cumulative Distribution Function The cumulative distribution function for the Bernoulli distribution is given by three cases. .. math:: \begin{cases} 0 &\text{if } X \leq 0 1 - p &\text{if } 0 \leq X \lt 1\\ 1 &\text{if } X \geq 1 \end{cases} where `p` is the :code:`bias` in favor of a positive event Parameters ---------- X : numpy.ndarray, int 1D dataset which falls within the domain of the given distribution support. The Bernoulli distribution expects series of 0 or 1 only. This value is often denoted `k` in the literature. Returns ------- numpy.ndarray The output log transformed cumulative distribution reported elementwise with respect to the input data. """ # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) return np.log(self.cdf(X))
def quantile(self, *q): # check array for numpy structure q = check_array(q, reduce_args=True, ensure_1d=True) vals = np.ceil(sc.pdtrik(q, self.rate)) vals1 = np.maximum(vals - 1, 0) temp = sc.pdtr(vals1, self.rate) return np.where(temp >= q, vals1, vals)
def log_pdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # alias parameters a, b = self.shape, self.rate return a * np.log(b) + (a - 1) * np.log(X) - b * X - sc.gammaln(a)
def cdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # floor X values X = np.floor(X) return sc.bdtr(X, self.n_trials, self.bias)
def cdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # alias parameters a, b = self.shape, self.rate return sc.gammainc(a, b * X)
def partial_fit(self, X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) super(ChiSquared, self).partial_fit(X) self.dof = np.round(2 * self.shape).astype(int) return self
def cdf(self, *X): # check array for numpy structure X = np.floor( check_array(X, reduce_args=True, ensure_1d=True, dtype=int)) if self.high_inclusive: return np.clip((X - self.low) / (self.high - self.low), 0, 1) return np.clip((X - self.low) / ((self.high - 1) - self.low), 0, 1)
def cdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # floor X values X = np.floor(X) return sc.betainc(self.n_success, X + 1, self.bias)
def quantile(self, *q): # check array for numpy structure q = check_array(q, reduce_args=True, ensure_1d=True) # alias parameters a, b = self.shape, self.rate return sc.gammaincinv(a, q) / b
def quantile(self, *q): # check array for numpy structure q = check_array(q, reduce_args=True, ensure_1d=True) # compute quantile out = np.ceil(np.log(1 - q) / np.log(1 - self.bias)) - 1 # return safely with bounds check return np.where(self.support.contains(out), out, np.nan)
def pmf(self, *X): # check array for numpy structure # NOTE: feature_axis set to rows to ensure that *args that represent a # single observtion will be the correct shape. Otherwise, users will # *have* to pass correct shape for multiple observations (which does not # effect the final shape) X = check_array(X, reduce_args=True, atleast_2d=True, feature_axis=0) return np.exp(self.log_pmf(X))
def log_pdf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) lb = self.low <= X if self.high_inclusive: ub = self.high >= X else: ub = self.high > X return np.log(lb * ub) - np.log(self.high - self.low)
def partial_fit(self, X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # first fit if not hasattr(self, "_n_samples"): self._n_samples = 0 # update mean if self.shape is None and self.rate is None: self._n_samples += X.shape[0] - np.isnan(X).sum() self._mean = np.nanmean(X) self._log_mean = np.nanmean(np.log(X)) else: # previous values prev_size = self._n_samples prev_mean = self._mean prev_log_mean = self._log_mean # current values curr_size = X.shape[0] - np.isnan(X).sum() curr_mean = np.nanmean(X) curr_log_mean = np.nanmean(np.log(X)) # update size self._n_samples = prev_size + curr_size # update mean self._mean = ((prev_mean * prev_size) + (curr_mean * curr_size)) / self._n_samples # update log-mean self._log_mean = ((prev_log_mean * prev_size) + (curr_log_mean * curr_size)) / self._n_samples # solving for shape parameter has no analytical closed form solution # however shape is numerically well behaved and can be computed with # some level of numerical stability. Below we estimate parameter `s` # which aids in the estimation of shape parameter `k`. s = np.log(self._mean) - self._log_mean k = (3 - s + np.sqrt((s - 3) * (s - 3) + 24 * s)) / (12 * s) # this estimation of k is within 1.5% of correct value updated with # explicit form of Newton-Raphson k -= (np.log(k) - sc.psi(k) - s) / ((1 / k) - sc.psi(k)) # solve for theta (theta = 1 / self.rate) theta = self._mean / k # update parameters self.shape = k self.rate = 1 / theta return self
def log_pmf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True, dtype=int) lb = self.low <= X if self.high_inclusive: ub = self.high >= X nrange = self.high - self.low else: ub = self.high > X nrange = (self.high - 1) - self.low return np.log(lb * ub) - np.log(nrange)
def log_pmf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # floor values of X X = np.floor(X) # alias n_success k = self.n_success # expand all components of log-pmf (k + X - 1, k) out = (sc.gammaln(k + X) - (sc.gammaln(k + 1) + sc.gammaln(X)) + X * nanlog(1 - self.bias) + k * nanlog(self.bias)) return out
def log_pmf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # Floor values of X X = np.floor(X) # Expand all components of log-pmf out = ( sc.gammaln(self.n_trials + 1) - (sc.gammaln(X + 1) + sc.gammaln(self.n_trials - X + 1)) + sc.xlogy(X, self.bias) + sc.xlog1py(self.n_trials - X, -self.bias) ) return out
def partial_fit(self, X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True).astype(float) # identify values outside of support # NOTE: we don't know the "true" upper bounds so we only # check that values are positive invalid = X < 0 X[invalid] = np.nan # first fit if not hasattr(self, "_n_samples"): self._n_samples = 0 if self._mean is None and self._variance is None: self._n_samples += X.shape[0] - np.isnan(X).sum() self._mean = np.nanmean(X) self._variance = np.nanvar(X) else: # previous values prev_size = self._n_samples prev_mean = self._mean prev_variance = self._variance # new values curr_size = X.shape[0] - np.isnan(X).sum() curr_mean = np.nanmean(X) curr_variance = np.nanvar(X) # update size self._n_samples = prev_size + curr_size # update mean self._mean = ( (prev_mean * prev_size) + (curr_mean * curr_size) ) / self._n_samples # update variance self._variance = ( (prev_variance * prev_size) + (curr_variance * curr_size) ) / self._n_samples self.bias = 1 - (self._variance / self._mean) self.n_trials = np.round(self._mean / self.bias).astype(int) return self
def quantile(self, *q): # check array for numpy structure q = check_array(q, reduce_args=True, ensure_1d=True) # get the upper value of X (ceiling) X_up = np.ceil(sc.nbdtrik(q, self.n_success, self.bias)) # get the lower value of X (floor) X_down = np.maximum(X_up - 1, 0) # recompute quantiles to validate transformation q_test = sc.nbdtr(X_down, self.n_success, self.bias) # when q_test is greater than true, shift output down out = np.where(q_test >= q, X_down, X_up).astype(int) # return only in-bound values return np.where(self.support.contains(out), out, np.nan)
def partial_fit(self, X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # First fit if self.low is None and self.high is None: self.low = np.nanmin(X) self.high = np.nanmax(X) else: # Update distribution support curr_low, curr_high = np.nanmin(X), np.nanmax(X) if curr_low < self.low: self.low = curr_low if curr_high > self.high: self.high = curr_high return self