Ejemplo n.º 1
0
    def rvs(self, x, n_curves=1, n_samples=1, T=None):
        # Samples values from this distribution
        # T is optional and means we already observed non-conversion until T
        assert self._ci  # Need to be fit with MCMC
        if T is None:
            T = numpy.zeros((n_curves, n_samples))
        else:
            assert T.shape == (n_curves, n_samples)
        B = numpy.zeros((n_curves, n_samples), dtype=numpy.bool)
        C = numpy.zeros((n_curves, n_samples))
        params = self.params['samples']
        for i, j in enumerate(numpy.random.randint(len(params['k']),
                                                   size=n_curves)):
            k = params['k'][j]
            p = params['p'][j]
            lambd = exp(dot(x, params['alpha'][j]) + params['a'][j])
            c = expit(dot(x, params['beta'][j]) + params['b'][j])
            z = numpy.random.uniform(size=(n_samples,))
            cdf_now = c * gammainc(
                k,
                numpy.multiply.outer(T[i], lambd)**p)  # why is this outer?
            adjusted_z = cdf_now + (1 - cdf_now) * z
            B[i] = (adjusted_z < c)
            y = adjusted_z / c
            w = gammaincinv(k, y)
            # x = (t * lambd)**p
            C[i] = w**(1./p) / lambd
            C[i][~B[i]] = 0

        return B, C
Ejemplo n.º 2
0
def generalized_gamma_loss(x, X, B, T, W, fix_k, fix_p,
                           hierarchical, flavor, callback=None):
    # parameters for this distribution is p, k, lambd
    k = exp(x[0]) if fix_k is None else fix_k # x[0], x[1], x
    p = exp(x[1]) if fix_p is None else fix_p
    log_sigma_alpha = x[2]
    log_sigma_beta = x[3]
    a = x[4]
    b = x[5]
    n_features = int((len(x)-6)/2)
    alpha = x[6:6+n_features]
    beta = x[6+n_features:6+2*n_features]
    lambd = exp(dot(X, alpha)+a) # lambda = exp(\alpha+a),  X shape is N * n_groups, alpha is \n_features * 1 

    # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p)
    log_pdf = log(p) + (k*p) * log(lambd) - gammaln(k) \
              + (k*p-1) * log(T) - (T*lambd)**p
    cdf = gammainc(k, (T*lambd)**p)

    if flavor == 'logistic':  # Log-likelihood with sigmoid
        c = expit(dot(X, beta)+b) # fit one beta for each group 
        LL_observed = log(c) + log_pdf
        LL_censored = log((1 - c) + c * (1 - cdf))
    elif flavor == 'linear':  # L2 loss, linear
        c = dot(X, beta)+b
        LL_observed = -(1 - c)**2 + log_pdf
        LL_censored = -(c*cdf)**2

    LL_data = sum(
        W * B * LL_observed +
        W * (1 - B) * LL_censored, 0)
    
                      \
                     - n_features*log_sigma_alpha
Ejemplo n.º 3
0
    def cdf_posteriori(self, x, t, ci=None):
        '''Returns the value of the cumulative distribution function
        for a fitted model.

        :param x: feature vector (or matrix)
        :param t: time
        :param ci: if this is provided, and the model was fit with
            `ci = True`, then the return value will be the trace
            samples generated via the MCMC steps. If this is not
            provided, then the max a posteriori prediction will be used.
        '''
        x = numpy.array(x)
        t = numpy.array(t)
        if ci is None:
            params = self.params["map"]
        else:
            assert self._ci
            params = self.params["samples"]
            t = numpy.expand_dims(t, -1)
        lambd = exp(dot(x, params["alpha"].T) + params["a"])
        if self._flavor == "logistic":
            c = expit(dot(x, params["beta"].T) + params["b"])
        elif self._flavor == "linear":
            c = dot(x, params["beta"].T) + params["b"]
        M = c * gammainc(params["k"], (t * lambd) ** params["p"])

        return M
 def _survival_function(self, params, times):
     mu_, ln_sigma_, lambda_ = params
     sigma_ = safe_exp(ln_sigma_)
     Z = (log(times) - mu_) / sigma_
     if lambda_ > 0:
         return gammaincc(1 / lambda_ ** 2, safe_exp(lambda_ * Z - 2 * np.log(lambda_)))
     else:
         return gammainc(1 / lambda_ ** 2, safe_exp(lambda_ * Z - 2 * np.log(-lambda_)))
Ejemplo n.º 5
0
 def _survival_function(self, params, times):
     mu_, ln_sigma_, lambda_ = params
     sigma_ = exp(ln_sigma_)
     Z = (log(times) - mu_) / sigma_
     if lambda_ > 0:
         return gammaincc(1 / lambda_**2, exp(lambda_ * Z) / lambda_**2)
     else:
         return gammainc(1 / lambda_**2, exp(lambda_ * Z) / lambda_**2)
Ejemplo n.º 6
0
    def _predict(self, params, x, t):
        lambd = exp(dot(x, params['alpha'].T) + params['a'])
        if self._flavor == 'logistic':
            c = expit(dot(x, params['beta'].T) + params['b'])
        elif self._flavor == 'linear':
            c = dot(x, params['beta'].T) + params['b']
        M = c * gammainc(params['k'], (t * lambd)**params['p'])

        return M
Ejemplo n.º 7
0
    def _survival_function(self, params, T, Xs):
        lambda_ = np.clip(Xs["lambda_"] @ params["lambda_"], 1e-25, 1e10)
        sigma_ = safe_exp(Xs["sigma_"] @ params["sigma_"])
        mu_ = Xs["mu_"] @ params["mu_"]

        Z = (log(T) - mu_) / sigma_
        ilambda_2 = 1 / lambda_ ** 2
        exp_term = safe_exp(lambda_ * Z - 2 * log(np.abs(lambda_)))

        return np.where(lambda_ > 0, gammaincc(ilambda_2, exp_term), gammainc(ilambda_2, exp_term))
Ejemplo n.º 8
0
    def _survival_function(self, params, T, Xs):
        lambda_ = Xs["lambda_"] @ params["lambda_"]
        sigma_ = safe_exp(Xs["sigma_"] @ params["sigma_"])
        mu_ = Xs["mu_"] @ params["mu_"]

        Z = (log(T) - mu_) / sigma_
        ilambda_2 = 1 / lambda_**2
        exp_term = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e25)

        return np.where(lambda_ > 0, gammaincc(ilambda_2, exp_term),
                        gammainc(ilambda_2, exp_term))
Ejemplo n.º 9
0
def generalized_gamma_loss(x,
                           X,
                           B,
                           T,
                           W,
                           fix_k,
                           fix_p,
                           hierarchical,
                           flavor,
                           callback=None):
    k = exp(x[0]) if fix_k is None else fix_k
    p = exp(x[1]) if fix_p is None else fix_p
    log_sigma_alpha = x[2]
    log_sigma_beta = x[3]
    a = x[4]
    b = x[5]
    n_features = int((len(x) - 6) / 2)
    alpha = x[6:6 + n_features]
    beta = x[6 + n_features:6 + 2 * n_features]
    lambd = exp(dot(X, alpha) + a)

    # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p)
    log_pdf = log(p) + (k*p) * log(lambd) - gammaln(k) \
              + (k*p-1) * log(T) - (T*lambd)**p
    cdf = gammainc(k, (T * lambd)**p)

    if flavor == 'logistic':  # Log-likelihood with sigmoid
        c = expit(dot(X, beta) + b)
        LL_observed = log(c) + log_pdf
        LL_censored = log((1 - c) + c * (1 - cdf))
    elif flavor == 'linear':  # L2 loss, linear
        c = dot(X, beta) + b
        LL_observed = -(1 - c)**2 + log_pdf
        LL_censored = -(c * cdf)**2

    LL_data = sum(W * B * LL_observed + W * (1 - B) * LL_censored, 0)

    if hierarchical:
        # Hierarchical model with sigmas ~ invgamma(1, 1)
        LL_prior_a = -4*log_sigma_alpha - 1/exp(log_sigma_alpha)**2 \
                     - dot(alpha, alpha) / (2*exp(log_sigma_alpha)**2) \
                     - n_features*log_sigma_alpha
        LL_prior_b = -4*log_sigma_beta - 1/exp(log_sigma_beta)**2 \
                     - dot(beta, beta) / (2*exp(log_sigma_beta)**2) \
                     - n_features*log_sigma_beta
        LL = LL_prior_a + LL_prior_b + LL_data
    else:
        LL = LL_data

    if isnan(LL):
        return -numpy.inf
    if callback is not None:
        callback(LL)
    return LL
Ejemplo n.º 10
0
def test_gammainc():

    for a in np.logspace(-0.1, 2, 10):
        gammainc_1 = lambda x: gammainc(a, x)
        gammainc_2 = lambda x: grad(gammainc, argnum=1)(a, x)

        assert check_grad(gammainc_1, gammainc_2, 1e-4) < 0.0001
        assert check_grad(gammainc_1, gammainc_2, 1e-3) < 0.0001
        assert check_grad(gammainc_1, gammainc_2, 1e-2) < 0.0001
        assert check_grad(gammainc_1, gammainc_2, 1e-1) < 0.0001
        assert check_grad(gammainc_1, gammainc_2, 1e-0) < 0.0001
        assert check_grad(gammainc_1, gammainc_2, 1e1) < 0.0001
        assert check_grad(gammainc_1, gammainc_2, 1e2) < 0.0001
Ejemplo n.º 11
0
    def _survival_function(self, params, times):
        mu_, ln_sigma_, lambda_ = params
        sigma_ = safe_exp(ln_sigma_)
        Z = (log(times) - mu_) / sigma_
        clipped_exp = np.clip(safe_exp(lambda_ * Z) / lambda_**2, 1e-15, 1e20)
        ilambda_2 = 1 / lambda_**2

        if lambda_ > 0:
            v = gammaincc(ilambda_2, clipped_exp)
        elif lambda_ < 0:
            v = gammainc(ilambda_2, clipped_exp)
        else:
            v = norm.sf(Z)

        # never return a 0
        return np.clip(v, 1e-50, 1 - 1e-50)
Ejemplo n.º 12
0
    def cdf(self, x, t, ci=None):
        '''Returns the value of the cumulative distribution function
        for a fitted model. TODO: this should probably be renamed
        "predict" in the future to follow the scikit-learn convention.

        :param x: feature vector (or matrix)
        :param t: time
        :param ci: if this is provided, and the model was fit with
            `ci = True`, then the return value will contain one more
            dimension, and the last dimension will have size 3,
            containing the mean, the lower bound of the confidence
            interval, and the upper bound of the confidence interval.
            If this is not provided, then the max a posteriori
            prediction will be used.
        '''
        x = numpy.array(x)
        t = numpy.array(t)
        if ci is None:
            params = self.params['map']
        else:
            assert self._ci
            params = self.params['samples']
            t = numpy.expand_dims(t, -1)
        lambd = exp(dot(x, params['alpha'].T) + params['a'])
        if self._flavor == 'logistic':
            c = expit(dot(x, params['beta'].T) + params['b'])
        elif self._flavor == 'linear':
            c = dot(x, params['beta'].T) + params['b']
        M = c * gammainc(
            params['k'],
            (t*lambd)**params['p'])

        if not ci:
            return M
        else:
            # Replace the last axis with a 3-element vector
            y = numpy.mean(M, axis=-1)
            y_lo = numpy.percentile(M, (1-ci)*50, axis=-1)
            y_hi = numpy.percentile(M, (1+ci)*50, axis=-1)
            return numpy.stack((y, y_lo, y_hi), axis=-1)
Ejemplo n.º 13
0
def test_gammainc_fails():
    a = 0.1
    gammainc_1 = lambda x: gammainc(a, x)
    gammainc_2 = lambda x: grad(gammainc, argnum=1)(a, x)
    assert not check_grad(gammainc_1, gammainc_2, 1e-4) < 0.0001