Ejemplo n.º 1
0
    def log_pdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        norm = np.log(np.sqrt(2)) - np.log(self.scale * np.sqrt(np.pi))
        p = norm - (X**2 / (2 * self.variance))
        return np.where(X >= 0, p, 1)
Ejemplo n.º 2
0
    def pdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        norm = np.sqrt(2) / (self.scale * np.sqrt(np.pi))
        p = norm * np.exp(-X**2 / (2 * self.scale**2))
        return np.where(X > 0, p, 0)
Ejemplo n.º 3
0
    def partial_fit(self, X):

        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # first fit
        if not hasattr(self, '_n_samples'):
            self._n_samples = 0

        # Update center and variance
        if self._empirical_variance is None:
            self._n_samples += X.shape[0] - np.isnan(X).sum()
            self._empirical_variance = np.nanvar(X)
        else:
            # previous values
            prev_size = self._n_samples
            prev_variance = self._empirical_variance

            # new values
            curr_size = X.shape[0] - np.isnan(X).sum()
            curr_variance = np.nanvar(X)

            # update size
            self._n_samples = prev_size + curr_size

            # update variance
            self._empirical_variance = (
                (prev_variance * prev_size) +
                (curr_variance * curr_size)) / self._n_samples

        norm = (1 - (2 / np.pi))
        self.scale = _handle_zeros_in_scale(
            np.sqrt(self._empirical_variance / norm))
        return self
Ejemplo n.º 4
0
    def quantile(self, *q):
        """Quantile Function

        Also known as the inverse cumulative Distribution function, this function
        takes known quantiles and returns the associated `X` value from the 
        support domain.

        .. math::
            \begin{cases}
                0      &\text{if } 0 \leq q \lt p
                1      &\text{if } p \leq q \lt 1
            \end{cases}

        Parameters
        ----------
        q : numpy.ndarray, float
            The probabilities within domain [0, 1]

        Returns
        -------
        numpy.ndarray
            The `X` values from the support domain associated with the input
            quantiles.
        """
        # check array for numpy structure
        q = check_array(q, reduce_args=True, ensure_1d=True)

        out = np.ceil(sc.bdtrik(q, 1, self.bias))
        return np.where(self.bias >= q, 0, out)
Ejemplo n.º 5
0
    def log_pdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        norm = 2 * self.variance
        log_scale = np.log(self.scale) + np.log(np.sqrt(2 * np.pi))
        return -((X - self.center)**2) / norm - log_scale
Ejemplo n.º 6
0
    def partial_fit(self, X):
        # check_array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True).astype(float)

        # identify values outside of support
        X[self.support.not_contains(X)] = np.nan

        # first fit
        if not hasattr(self, "_n_samples"):
            self._n_samples = 0

        if self._mean is None:
            self._n_samples += X.shape[0] - np.isnan(X).sum()
            self._mean = np.nanmean(X)
        else:
            # previous values
            prev_size = self._n_samples
            prev_mean = self._mean

            # new values
            curr_size = X.shape[0] - np.isnan(X).sum()
            curr_mean = np.nanmean(X)

            # update size
            self._n_samples = prev_size + curr_size

            # update mean
            self._mean = ((prev_mean * prev_size) +
                          (curr_mean * curr_size)) / self._n_samples

        self.bias = 1 / self._mean
        return self
Ejemplo n.º 7
0
    def log_pmf(self, *X):
        """Log Probability Mass Function

        The probability mass function for the Bernoulli distribution is given
        by two cases. 

        .. math::
            \begin{cases}
                1-p  &\text{if } X = 0\\
                p    &\text{if } X = 1
            \end{cases}

        where `p` is the :code:`bias` in favor of a positive event

        Parameters
        ----------
        X : numpy.ndarray, int
            1D dataset which falls within the domain of the given distribution
            support. The Bernoulli distribution expects series of 0 or 1 only.
            This value is often denoted `k` in the literature.

        Returns
        -------
        numpy.ndarray
            The output log transformed probability mass reported elementwise 
            with respect to the input data.
        """

        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        return np.log(self.pmf(X))
Ejemplo n.º 8
0
    def log_pdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        norm = sc.betaln(self.alpha, self.beta)
        p = (self.alpha - 1) * np.log(X) + (self.beta - 1) * np.log(1 - X)
        return p - norm
Ejemplo n.º 9
0
    def partial_fit(self, X):

        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # first fit
        if not hasattr(self, '_n_samples'):
            self._n_samples = 0

        # Update rate
        if self.rate is None:
            self._n_samples += X.shape[0] - np.isnan(X).sum()
            self.rate = np.nanmean(X)
        else:
            # previous values
            prev_size = self._n_samples
            prev_rate = self.rate

            # new values
            curr_size = X.shape[0] - np.isnan(X).sum()
            curr_rate = np.nanmean(X)

            # update size
            self._n_samples = prev_size + curr_size

            # update rate
            self.rate = ((prev_rate * prev_size) +
                         (curr_rate * curr_size)) / self._n_samples

        return self
Ejemplo n.º 10
0
    def pdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        norm = sc.beta(self.alpha, self.beta)
        p = np.power(X, self.alpha - 1) * np.power(1 - X, self.beta - 1)
        return p / norm
Ejemplo n.º 11
0
    def quantile(self, *q):
        # check array for numpy structure
        q = check_array(q, reduce_args=True, ensure_1d=True)

        if self.high_inclusive:
            return self.low + q * (self.high - self.low)
        return self.low + q * ((self.high - 1) - self.low)
Ejemplo n.º 12
0
    def log_cdf(self, X):
        """Log Cumulative Distribution Function

        The cumulative distribution function for the Bernoulli distribution is 
        given by three cases. 

        .. math::
            \begin{cases}
                0      &\text{if } X \leq 0
                1 - p  &\text{if } 0 \leq X \lt 1\\
                1      &\text{if } X \geq 1
            \end{cases}

        where `p` is the :code:`bias` in favor of a positive event

        Parameters
        ----------
        X : numpy.ndarray, int
            1D dataset which falls within the domain of the given distribution
            support. The Bernoulli distribution expects series of 0 or 1 only.
            This value is often denoted `k` in the literature.

        Returns
        -------
        numpy.ndarray
            The output log transformed cumulative distribution reported elementwise
            with respect to the input data.
        """

        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        return np.log(self.cdf(X))
Ejemplo n.º 13
0
    def quantile(self, *q):
        # check array for numpy structure
        q = check_array(q, reduce_args=True, ensure_1d=True)

        vals = np.ceil(sc.pdtrik(q, self.rate))
        vals1 = np.maximum(vals - 1, 0)
        temp = sc.pdtr(vals1, self.rate)
        return np.where(temp >= q, vals1, vals)
Ejemplo n.º 14
0
    def log_pdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # alias parameters
        a, b = self.shape, self.rate

        return a * np.log(b) + (a - 1) * np.log(X) - b * X - sc.gammaln(a)
Ejemplo n.º 15
0
    def cdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # floor X values
        X = np.floor(X)

        return sc.bdtr(X, self.n_trials, self.bias)
Ejemplo n.º 16
0
    def cdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # alias parameters
        a, b = self.shape, self.rate

        return sc.gammainc(a, b * X)
Ejemplo n.º 17
0
    def partial_fit(self, X):

        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        super(ChiSquared, self).partial_fit(X)
        self.dof = np.round(2 * self.shape).astype(int)
        return self
Ejemplo n.º 18
0
    def cdf(self, *X):
        # check array for numpy structure
        X = np.floor(
            check_array(X, reduce_args=True, ensure_1d=True, dtype=int))

        if self.high_inclusive:
            return np.clip((X - self.low) / (self.high - self.low), 0, 1)
        return np.clip((X - self.low) / ((self.high - 1) - self.low), 0, 1)
Ejemplo n.º 19
0
    def cdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # floor X values
        X = np.floor(X)

        return sc.betainc(self.n_success, X + 1, self.bias)
Ejemplo n.º 20
0
    def quantile(self, *q):
        # check array for numpy structure
        q = check_array(q, reduce_args=True, ensure_1d=True)

        # alias parameters
        a, b = self.shape, self.rate

        return sc.gammaincinv(a, q) / b
Ejemplo n.º 21
0
    def quantile(self, *q):
        # check array for numpy structure
        q = check_array(q, reduce_args=True, ensure_1d=True)

        # compute quantile
        out = np.ceil(np.log(1 - q) / np.log(1 - self.bias)) - 1

        # return safely with bounds check
        return np.where(self.support.contains(out), out, np.nan)
Ejemplo n.º 22
0
    def pmf(self, *X):

        # check array for numpy structure
        # NOTE: feature_axis set to rows to ensure that *args  that represent a
        # single observtion will be the correct shape. Otherwise, users will
        # *have* to pass correct shape for multiple observations (which does not
        # effect the final shape)
        X = check_array(X, reduce_args=True, atleast_2d=True, feature_axis=0)

        return np.exp(self.log_pmf(X))
Ejemplo n.º 23
0
    def log_pdf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        lb = self.low <= X
        if self.high_inclusive:
            ub = self.high >= X
        else:
            ub = self.high > X
        return np.log(lb * ub) - np.log(self.high - self.low)
Ejemplo n.º 24
0
    def partial_fit(self, X):

        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # first fit
        if not hasattr(self, "_n_samples"):
            self._n_samples = 0

        # update mean
        if self.shape is None and self.rate is None:
            self._n_samples += X.shape[0] - np.isnan(X).sum()
            self._mean = np.nanmean(X)
            self._log_mean = np.nanmean(np.log(X))
        else:
            # previous values
            prev_size = self._n_samples
            prev_mean = self._mean
            prev_log_mean = self._log_mean

            # current values
            curr_size = X.shape[0] - np.isnan(X).sum()
            curr_mean = np.nanmean(X)
            curr_log_mean = np.nanmean(np.log(X))

            # update size
            self._n_samples = prev_size + curr_size

            # update mean
            self._mean = ((prev_mean * prev_size) +
                          (curr_mean * curr_size)) / self._n_samples

            # update log-mean
            self._log_mean = ((prev_log_mean * prev_size) +
                              (curr_log_mean * curr_size)) / self._n_samples

        # solving for shape parameter has no analytical closed form solution
        # however shape is numerically well behaved and can be computed with
        # some level of numerical stability. Below we estimate parameter `s`
        # which aids in the estimation of shape parameter `k`.
        s = np.log(self._mean) - self._log_mean
        k = (3 - s + np.sqrt((s - 3) * (s - 3) + 24 * s)) / (12 * s)

        # this estimation of k is within 1.5% of correct value updated with
        # explicit form of Newton-Raphson
        k -= (np.log(k) - sc.psi(k) - s) / ((1 / k) - sc.psi(k))

        # solve for theta (theta = 1 / self.rate)
        theta = self._mean / k

        # update parameters
        self.shape = k
        self.rate = 1 / theta
        return self
Ejemplo n.º 25
0
    def log_pmf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True, dtype=int)

        lb = self.low <= X
        if self.high_inclusive:
            ub = self.high >= X
            nrange = self.high - self.low
        else:
            ub = self.high > X
            nrange = (self.high - 1) - self.low

        return np.log(lb * ub) - np.log(nrange)
Ejemplo n.º 26
0
    def log_pmf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # floor values of X
        X = np.floor(X)

        # alias n_success
        k = self.n_success

        # expand all components of log-pmf
        (k + X - 1, k)
        out = (sc.gammaln(k + X) - (sc.gammaln(k + 1) + sc.gammaln(X)) +
               X * nanlog(1 - self.bias) + k * nanlog(self.bias))
        return out
Ejemplo n.º 27
0
    def log_pmf(self, *X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # Floor values of X
        X = np.floor(X)

        # Expand all components of log-pmf
        out = (
            sc.gammaln(self.n_trials + 1)
            - (sc.gammaln(X + 1) + sc.gammaln(self.n_trials - X + 1))
            + sc.xlogy(X, self.bias)
            + sc.xlog1py(self.n_trials - X, -self.bias)
        )
        return out
Ejemplo n.º 28
0
    def partial_fit(self, X):
        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True).astype(float)

        # identify values outside of support
        # NOTE: we don't know the "true" upper bounds so we only
        # check that values are positive
        invalid = X < 0
        X[invalid] = np.nan

        # first fit
        if not hasattr(self, "_n_samples"):
            self._n_samples = 0

        if self._mean is None and self._variance is None:
            self._n_samples += X.shape[0] - np.isnan(X).sum()
            self._mean = np.nanmean(X)
            self._variance = np.nanvar(X)
        else:
            # previous values
            prev_size = self._n_samples
            prev_mean = self._mean
            prev_variance = self._variance

            # new values
            curr_size = X.shape[0] - np.isnan(X).sum()
            curr_mean = np.nanmean(X)
            curr_variance = np.nanvar(X)

            # update size
            self._n_samples = prev_size + curr_size

            # update mean
            self._mean = (
                (prev_mean * prev_size) + (curr_mean * curr_size)
            ) / self._n_samples

            # update variance
            self._variance = (
                (prev_variance * prev_size) + (curr_variance * curr_size)
            ) / self._n_samples

        self.bias = 1 - (self._variance / self._mean)
        self.n_trials = np.round(self._mean / self.bias).astype(int)
        return self
Ejemplo n.º 29
0
    def quantile(self, *q):
        # check array for numpy structure
        q = check_array(q, reduce_args=True, ensure_1d=True)

        # get the upper value of X (ceiling)
        X_up = np.ceil(sc.nbdtrik(q, self.n_success, self.bias))

        # get the lower value of X (floor)
        X_down = np.maximum(X_up - 1, 0)

        # recompute quantiles to validate transformation
        q_test = sc.nbdtr(X_down, self.n_success, self.bias)

        # when q_test is greater than true, shift output down
        out = np.where(q_test >= q, X_down, X_up).astype(int)

        # return only in-bound values
        return np.where(self.support.contains(out), out, np.nan)
Ejemplo n.º 30
0
    def partial_fit(self, X):

        # check array for numpy structure
        X = check_array(X, reduce_args=True, ensure_1d=True)

        # First fit
        if self.low is None and self.high is None:
            self.low = np.nanmin(X)
            self.high = np.nanmax(X)
        else:
            # Update distribution support
            curr_low, curr_high = np.nanmin(X), np.nanmax(X)
            if curr_low < self.low:
                self.low = curr_low

            if curr_high > self.high:
                self.high = curr_high

        return self