Ejemplo n.º 1
0
def get_logdelta_ana_gaussian(sigma, eps):
    """ This function calculates the delta parameter for analytical gaussian mechanism given eps"""
    assert (eps >= 0)
    s, mag = utils.stable_log_diff_exp(
        norm.logcdf(0.5 / sigma - eps * sigma),
        eps + norm.logcdf(-0.5 / sigma - eps * sigma))
    return mag
Ejemplo n.º 2
0
    def loglikeobs(self, params_all):
        """
        Log-likelihood of model.
        Parameters
        ----------
        params_all : array-like
            Parameter estimates, with the parameters for the regression 
            equation coming first, then the parameters for the
            selection equation, then log sigma, then atanh rho.
        Returns
        -------
        loglike : float
            The value of the log-likelihood function for a Heckman correction model.
        """

        # set up data and parameters needed to compute log likelihood
        Y, X, Z = self.get_datamats()
        D = self.treated

        num_xvars = X.shape[1]
        num_zvars = Z.shape[1]

        xbeta = np.asarray(params_all[:num_xvars])  # reg eqn coefs
        zbeta = np.asarray(params_all[num_xvars:num_xvars +
                                      num_zvars])  # selection eqn coefs
        log_sigma = params_all[-2]
        atanh_rho = params_all[-1]
        sigma = np.exp(log_sigma)
        rho = np.tanh(atanh_rho)

        # line the data vectors up
        Z_zbeta_aligned = Z.dot(zbeta)

        X_xbeta = X.dot(xbeta)
        X_xbeta_aligned = np.empty(self.nobs_total)
        X_xbeta_aligned[:] = np.nan
        X_xbeta_aligned[D] = X_xbeta
        del X_xbeta

        Y_aligned = np.empty(self.nobs_total)
        Y_aligned[:] = np.nan
        Y_aligned[D] = Y

        # create an array where each row is the log likelihood for the corresponding observation
        norm_cdf_input = (
            Z_zbeta_aligned +
            (Y_aligned - X_xbeta_aligned) * rho / sigma) / np.sqrt(1 - rho**2)
        norm_cdf_input[~D] = 0  # dummy value

        ll_obs_observed =             np.multiply(D,
                norm.logcdf(norm_cdf_input) - \
                (1./2.)*((Y_aligned-X_xbeta_aligned)/sigma)**2 - \
                np.log(np.sqrt(2*np.pi)*sigma))
        ll_obs_observed[~D] = 0
        ll_obs_notobserved = np.multiply(1 - D, norm.logcdf(-Z_zbeta_aligned))

        ll_obs = ll_obs_observed + ll_obs_notobserved

        # return log likelihood by observation vector
        return ll_obs
Ejemplo n.º 3
0
def integrateNormalDensity(lb, ub, mu=0, sigma=1):
    from scipy.stats import norm
    assert not (ub < lb)
    lessThanUpper = norm.logcdf(ub, loc=mu, scale=sigma)
    lessThanLower = norm.logcdf(lb, loc=mu, scale=sigma)
    #print lessThanUpper,lessThanLower,lessThanUpper-lessThanLower,1 - math.exp(lessThanLower - lessThanUpper)
    return lessThanUpper + np.log1p(-math.exp(lessThanLower - lessThanUpper))
Ejemplo n.º 4
0
def constraint_indicator_func(x, indicator_how):
    if indicator_how == "Fermi_Dirac":
        return np.log(expit(x))
    elif indicator_how == "normal":
        return norm.logcdf(x)
    else:
        return norm.logcdf(x)
Ejemplo n.º 5
0
    def _infer_probabilities(self, features, stdev):
        """ calculate probabilities of selected primitives out of polynom features and their standard deviation
        assuming that the features are independent and normal distributed with mean = coefficient it self and stdev.

        Args:
            features (ndarray): polynom coefficients
            stdev (ndarray): standard deviation of the polynom coefficients

        """
        # get the max of each feature over time and take a procentage of it, attention to second derivative.
        deltas = np.nanmax(np.absolute(features), axis=0) * self.delta + np.nanmin(np.absolute(features), axis=0)  # np.nanmax(np.absolute(b), axis=0) * [0.2, self.delta, self.delta]

        # Convert regression coefficients to probabilities for qualitative state, normalised features to use norm.logcdf
        prob_p = norm.logcdf((features - deltas) / stdev)
        prob_n = norm.logcdf((-features - deltas) / stdev)
        prob_pn = norm.logcdf((deltas - features) / stdev)
        prob_0 = prob_pn + np.log1p(-np.exp(prob_n-prob_pn))

        params = {'pos': prob_p, 'neg': prob_n, 'zero': prob_0, 'ignore': np.zeros(features.shape)}

        py = np.zeros((features.shape[0], self.prim_nr))

        # collect for each primitive the right probability and adds them together (assume independent probabilities)
        for i, prim in enumerate(self.primitives):  # iterate over primitives
            # iterate over signal, 1st and 2nd derivatives, this is equal to iterate over params of polynom.
            prob = [params[sign][:, j] for j, sign in enumerate(self.all_primitives[prim]) if j < self.coeff_nr]
            py[:, i] = np.sum(prob, axis=0)

        return py
Ejemplo n.º 6
0
def calculate_log_joint_bernoulli_likelihood(latent_prob_samples: np.ndarray,
                                             outcomes: np.ndarray,
                                             link: str = "probit") -> float:
    # latent_prob_samples is n_samples x n_outcomes array of probabilities on
    # the probit scale
    # outcomes is (n_outcomes,) array of binary outcomes (1 and 0)
    assert latent_prob_samples.shape[1] == outcomes.shape[0]

    # Make sure broadcasting is unambiguous
    assert latent_prob_samples.shape[0] != outcomes.shape[0]

    n_samples = latent_prob_samples.shape[0]

    # Get log likelihood for each draw

    assert link in ["logit",
                    "probit"], "Only logit and probit links supported!"

    if link == "probit":
        individual_liks = np.sum(
            outcomes * norm.logcdf(latent_prob_samples) +
            (1 - outcomes) * norm.logcdf(-latent_prob_samples),
            axis=1,
        )
    else:
        individual_liks = np.sum(
            outcomes * np.log(expit(latent_prob_samples)) +
            (1 - outcomes) * np.log(1 - expit(latent_prob_samples)),
            axis=1,
        )

    # Compute the Monte Carlo expectation
    return logsumexp(individual_liks - np.log(n_samples))
Ejemplo n.º 7
0
def gaussianize_1d(X, pi, mu, sigma_sqr):
  mask_bound = 5e-8

  N, D = X.shape

  # for calculations please see: https://www.overleaf.com/6125358376rgmjjgdsmdmm
  scaled = (X.unsqueeze(-1) - mu) / sigma_sqr**0.5
  scaled = scaled.cpu()
  normal_cdf = to_tensor(norm.cdf(scaled))
  cdf = (pi * normal_cdf).sum(-1)
  log_cdfs = to_tensor(norm.logcdf(scaled))
  log_cdf = torch.logsumexp(torch.log(pi) + log_cdfs, dim=-1)
  log_sfs = to_tensor(norm.logcdf(-1*scaled))
  log_sf = torch.logsumexp(torch.log(pi) + log_sfs, dim=-1)

  # Approximate Gaussian CDF
  # inv(CDF) ~ np.sqrt(-2 * np.log(1-x)) #right, x -> 1
  # inv(CDF) ~ -np.sqrt(-2 * np.log(x)) #left, x -> 0
  # 1) Step1: invert good CDF
  cdf_mask = ((cdf > mask_bound) & (cdf < 1 - (mask_bound))).double()
  # Keep good CDF, mask the bad CDF values to 0.5(inverse(0.5)=0.)
  cdf_good = cdf * cdf_mask + 0.5 * (1. - cdf_mask)
  inverse_cdf = normal_distribution.icdf(cdf_good)

  # 2) Step2: invert BAD large CDF
  cdf_mask_right = (cdf >= 1. - (mask_bound)).double()
  # Keep large bad CDF, mask the good and small bad CDF values to 0.
  cdf_bad_right_log = log_sf * cdf_mask_right
  inverse_cdf += torch.sqrt(-2. * cdf_bad_right_log)

  # 3) Step3: invert BAD small CDF
  cdf_mask_left = (cdf <= mask_bound).double()
  # Keep small bad CDF, mask the good and large bad CDF values to 1.
  cdf_bad_left_log = log_cdf * cdf_mask_left
  inverse_cdf += (-torch.sqrt(-2 * cdf_bad_left_log))
  if torch.isnan(inverse_cdf.max()) or torch.isnan(inverse_cdf.min()):
    print('inverse CDF: NaN.')
    pdb.set_trace()
  if torch.isinf(inverse_cdf.max()) or torch.isinf(inverse_cdf.min()):
    print('inverse CDF: Inf.')
    exit(0)
    pdb.set_trace()

  # old simple (and possibly numerically unstable) way
  cdf2 = norm.cdf(scaled)
  # remove outliers 
  cdf2[cdf2<EPS] = EPS
  cdf2[cdf2>1-EPS] = 1 - EPS
  new_distr = (pi.cpu().numpy() * cdf2).sum(-1)
  new_X = norm.ppf(new_distr)
  new_X = to_tensor(new_X)

  if False and torch.norm(new_X - inverse_cdf) > 10:
    print('Gaussianization 1D mismatch.')
    pdb.set_trace()

  # return inverse_cdf, cdf_mask, [log_cdf, cdf_mask_left], [log_sf, cdf_mask_right]
  return new_X, cdf_mask, [log_cdf, cdf_mask_left], [log_sf, cdf_mask_right]
Ejemplo n.º 8
0
    def log_likelihood(self, smis, *, log_0=-1000.0, **targets):
        def _avoid_overflow(ll_):
            # log(exp(log(UP) - log(C)) - exp(log(LOW) - log(C))) + log(C)
            # where C = max(log(UP), max(LOW))
            ll_c = np.max(ll_)
            ll_ = np.log(np.exp(ll_[1] - ll_c) - np.exp(ll_[0] - ll_c)) + ll_c
            return ll_

        # self.update_targets(reset=False, **targets):
        for k, v in targets.items():
            if not isinstance(v, tuple) or len(v) != 2 or v[1] <= v[0]:
                raise ValueError('must be a tuple with (low, up) boundary')
            self.targets[k] = v

        if not self.targets:
            raise RuntimeError('<targets> is empty')

        if isinstance(smis, (pd.Series, pd.DataFrame)):
            ll = pd.DataFrame(np.full((len(smis), len(self._mdl)), log_0),
                              index=smis.index,
                              columns=self._mdl.keys())
        else:
            ll = pd.DataFrame(np.full((len(smis), len(self._mdl)), log_0),
                              columns=self._mdl.keys())

        # 1. apply prediction on given sims
        # 2. reset returns' index to [0, 1, ..., len(smis) - 1], this should be consistent with ll's index
        # 3. drop all rows which have NaN value(s)
        pred = self.predict(smis).reset_index(drop=True).dropna(axis='index',
                                                                how='any')

        # because pred only contains available data
        # 'pred.index.values' should eq to the previous implementation
        idx = pred.index.values

        # calculate likelihood
        for k, (low, up) in self.targets.items():  # k: target; v: (low, up)

            # predict mean, std for all smiles
            mean, std = pred[k + ': mean'], pred[k + ': std']

            # calculate low likelihood
            low_ll = norm.logcdf(low,
                                 loc=np.asarray(mean),
                                 scale=np.asarray(std))

            # calculate up likelihood
            up_ll = norm.logcdf(up,
                                loc=np.asarray(mean),
                                scale=np.asarray(std))

            # zip low and up likelihood to a 1-dim array then save it.
            # like: [(tar_low_smi1, tar_up_smi1),  (tar_low_smi2, tar_up_smi2), ..., (tar_low_smiN, tar_up_smiN)]
            lls = zip(low_ll, up_ll)
            ll[k].iloc[idx] = np.array([*map(_avoid_overflow, list(lls))])

        return ll
Ejemplo n.º 9
0
def plot_decomposed_manhattan2(self,
                               tissues=None,
                               width=None,
                               components=None,
                               save_path=None):
    if tissues is None:
        tissues = np.arange(self.dims['T'])
    else:
        tissues = np.arange(self.dims['T'])[np.isin(self.tissue_ids, tissues)]

    if components is None:
        components = np.arange(self.dims['K'])[np.any((self.active > 0.5), 0)]

    if width is None:
        width = int(np.sqrt(tissues.size)) + 1
        height = width
    else:
        height = int(tissues.size / width) + 1

    pred = ((self.active * self.weight_means) @ (self.X @ self.pi.T).T)
    logp = -norm.logcdf(-np.abs(pred)) - np.log(2)
    pos = np.array([int(x.split('_')[1]) for x in self.snp_ids])

    W = self.active * self.weight_means
    c = (self.X @ self.pi.T)

    pred = self._compute_prediction()
    fig, ax = plt.subplots(height,
                           width,
                           figsize=(width * 4, height * 3),
                           sharey=False)

    ax = np.array(ax).flatten()
    for i, t in enumerate(tissues):
        ax[i].set_title('{}\nby component'.format(self.tissue_ids[t]))

        for k in components:
            predk = self._compute_prediction() - self._compute_prediction(k=k)
            logpk = -norm.logcdf(-np.abs(predk)) - np.log(2)
            if i == 0:
                ax[i].scatter(pos,
                              logpk,
                              marker='o',
                              alpha=0.5,
                              label='k{}'.format(k))
            else:
                ax[i].scatter(pos, logpk, marker='o', alpha=0.5)
        ax[i].set_xlabel('SNP position')
        fig.legend()

    plt.tight_layout()
    if save_path is not None:
        plt.savefig(save_path)
    # plt.show()
    plt.close()
Ejemplo n.º 10
0
def discretized_normal_log(yvals, mean, sd):
    s = yvals[1] - yvals[0]  #distance between points in ygrid
    bot_dist = norm.logcdf(
        x=yvals - s / 2.0, loc=mean, scale=sd
    )  #LOG cdf at midpoint between point in grid and previous point
    top_dist = norm.logcdf(
        x=yvals + s / 2.0, loc=mean,
        scale=sd)  #LOG cdf at midpoint between point in grid and next point
    diff = top_dist + np.log(
        -np.expm1(bot_dist - top_dist))  #log1p difference formula
    diff[0] = top_dist[0]  #first value should integrate from -inf
    diff[-1] = norm.logsf(x=ygrid[-1] - s / 2.0, loc=mean,
                          scale=sd)  #last value should integrate to inf
    return diff
Ejemplo n.º 11
0
    def ppl_acq_pi(self, pmout_samp, normal=True):
        """
        PPL-PI: PPL acquisition function algorithm for probability of
        improvement (PI).

        Parameters
        ----------
        pmout_samp : ndarray
            A numpy ndarray with shape=(nsamp,).
        normal : bool
            If true, assume pmout_samp are Gaussian distributed.

        Returns
        -------
        float
            PPL-PI acquisition function value.
        """
        youts = np.array(pmout_samp).flatten()
        nsamp = youts.shape[0]
        y_min = self.data.y.min()
        if normal:
            mu = np.mean(youts)
            sig = np.std(youts)
            if sig < 1e-6:
                sig = 1e-6
            piVal = -1 * norm.logcdf(y_min, loc=mu, scale=sig)
        else:
            piVal = -1 * len(np.argwhere(youts < y_min)) / float(nsamp)
        return piVal
Ejemplo n.º 12
0
def upper_bound_logpartition_41(tau, inv_alpha_1):
    tau_1, tau_2 = tau[:D+N], tau[D+N:]
    tau_1_N, tau_2_N = tau_1[D:], tau_2[D:]     # first D values correspond to w
    #assert len(tau_1_N) == len(tau_2_N) == 0
    alpha_1 = 1.0 / inv_alpha_1
    inv_alpha_2 = 1 - inv_alpha_1
    #if np.any(tau_1 <= 0):
    #    print 'one of the tau_1 <= 0: setting integral_1 to INF'
    if np.any(tau_1 <= 0.01):
        #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF'
        integral_1 = INF2
    else:
        integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \
                        + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \
                        + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \
                        + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi)
    mat = A - np.diag(tau_1)
    sign, logdet = np.linalg.slogdet(mat)
    if (sign < 0) or np.isinf(logdet):
        #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet)
        integral_2 = INF2
    else:
        try:
            integral_2 = inv_alpha_2 * (-0.5) * (-(D+N)*np.log(inv_alpha_2) + logdet) + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2))
        except np.linalg.linalg.LinAlgError:
            integral_2 = INF2
        integral_2 += inv_alpha_2 * (N+D) * 0.5 * np.log(2 * np.pi)
    integral = integral_1 + integral_2
#    print 'integral 41: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral)
    return integral
Ejemplo n.º 13
0
def likelihood_function(
    theta, y, yerr
):  #theta - the input values, x = frequencies, y = our fluxes from the residual profile. #all in log values

    temperature, density, beta = theta  #input parameters for the bb function to make the MBB everytime over the chain
    model = surface_brightness(
        frequencies, temperature, 10**density, beta
    )  #model in this instance = bb with the varying free parameters(frequencies, temperature, density, beta) given by mcmc
    model = synthetic_photometry(
        model, filter_array
    )  #Model/Likleyhood function - Synthetic photometry is the model. We need to fit our data to this model.

    #We need two likelihood functions. One for detection data points (val>3sigma) and one for no dection data points(val<3sigma).
    #1. Gaussian function - Suitable for data with a well defined value with +/- unc and we have a detection of 3sigma or higher - value > 3*val.unc=3sigma.unc. Not good for values which are only upper limits.
    detection = np.where(y >= 3 * yerr)
    likelihood = (-0.5) * np.sum(((y[detection] - model[detection])**2 /
                                  (yerr[detection]**2)) +
                                 (np.log(2 * np.pi * (yerr[detection]**2))))
    #2. Cumilative Distribution function. For values where we only have an upper limit (In our data that's mostly 450 SCUBA2). To be used for data points where value < 3*value.unc=3sigma.unc. In this case 3yerr(3sgima value) which is the upper limit for that data point becomes the data point.
    non_detection = np.where(y < 3 * yerr)
    likelihood = likelihood + np.sum(
        norm.logcdf(3 * yerr[non_detection], model[non_detection],
                    yerr[non_detection]))

    return likelihood
Ejemplo n.º 14
0
def plot_manhattan(self, component, thresh=0.0, save_path=None):
    """
    make manhattan plot for tissues, colored by lead snp of a components
    include tissues with p(component active in tissue) > thresh
    """
    logp = -norm.logcdf(-np.abs(self.Y)) - np.log(2)
    pos = np.array([int(x.split('_')[1]) for x in self.snp_ids])
    #sorted_tissues = np.flip(np.argsort(self.active[:, component]))
    #active_tissues = sorted_tissues[self.active[sorted_tissues, component] > thresh]
    active_tissues = np.arange(
        self.dims['T'])[self.active[:, component] > thresh]
    fig, ax = plt.subplots(1,
                           active_tissues.size,
                           figsize=(5 * active_tissues.size, 4),
                           sharey=True)
    for i, tissue in enumerate(active_tissues):
        lead_snp = self.pi.T[:, component].argmax()
        r2 = self.X[lead_snp]**2
        ax[i].scatter(pos, logp[tissue], c=r2, cmap='RdBu_r')
        ax[i].set_title(
            'Tissue: {}\nLead SNP {}\nweight= {:.2f}, p={:.2f}'.format(
                self.tissue_ids[tissue], lead_snp,
                self.weight_means[tissue, component], self.active[tissue,
                                                                  component]))
        ax[i].set_xlabel('SNP')

    ax[0].set_ylabel('-log(p)')

    if save_path is not None:
        plt.savefig(save_path)
    # plt.show()
    plt.close()
Ejemplo n.º 15
0
Archivo: mes.py Proyecto: kenuku/abo_py
    def _setup(self):
        super(MinValueEntropySearch, self)._setup()

        # Apply Gumbel sampling
        m = self.models[0]
        valid = self.feasible_data_index()

        # Work with feasible data
        X = self.data[0][valid, :]
        N = np.shape(X)[0]
        Xrand = RandomDesign(self.gridsize, self._domain).generate()
        fmean, fvar = m.predict_f(np.vstack((X, Xrand)))
        idx = np.argmin(fmean[:N])
        right = fmean[idx].flatten()# + 2*np.sqrt(fvar[idx]).flatten()
        left = right
        probf = lambda x: np.exp(np.sum(norm.logcdf(-(x - fmean) / np.sqrt(fvar)), axis=0))

        i = 0
        while probf(left) < 0.75:
            left = 2. ** i * np.min(fmean - 5. * np.sqrt(fvar)) + (1. - 2. ** i) * right
            i += 1

        # Binary search for 3 percentiles
        q1, med, q2 = map(lambda val: bisect(lambda x: probf(x) - val, left, right, maxiter=10000, xtol=0.01),
                          [0.25, 0.5, 0.75])
        beta = (q1 - q2) / (np.log(np.log(4. / 3.)) - np.log(np.log(4.)))
        alpha = med + beta * np.log(np.log(2.))

        # obtain samples from y*
        mins = -np.log(-np.log(np.random.rand(self.num_samples).astype(np_float_type))) * beta + alpha
        self.samples.set_data(mins)
Ejemplo n.º 16
0
Archivo: LP.py Proyecto: yxchng/GPyOpt
 def _hammer_function(self, x, x0, r_x0, s_x0):
     '''
     Creates the function to define the exclusion zones
     '''
     return norm.logcdf((np.sqrt((np.square(
         np.atleast_2d(x)[:, None, :] -
         np.atleast_2d(x0)[None, :, :])).sum(-1)) - r_x0) / s_x0)
Ejemplo n.º 17
0
def upper_bound_logpartition_41(tau, inv_alpha_1):
    tau_1, tau_2 = tau[:D + N], tau[D + N:]
    tau_1_N, tau_2_N = tau_1[D:], tau_2[D:]  # first D values correspond to w
    #assert len(tau_1_N) == len(tau_2_N) == 0
    alpha_1 = 1.0 / inv_alpha_1
    inv_alpha_2 = 1 - inv_alpha_1
    #if np.any(tau_1 <= 0):
    #    print 'one of the tau_1 <= 0: setting integral_1 to INF'
    if np.any(tau_1 <= 0.01):
        #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF'
        integral_1 = INF2
    else:
        integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \
                        + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \
                        + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \
                        + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi)
    mat = A - np.diag(tau_1)
    sign, logdet = np.linalg.slogdet(mat)
    if (sign < 0) or np.isinf(logdet):
        #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet)
        integral_2 = INF2
    else:
        try:
            integral_2 = inv_alpha_2 * (-0.5) * (
                -(D + N) * np.log(inv_alpha_2) + logdet) + 0.5 * np.sum(
                    tau_2 * np.linalg.solve(mat, tau_2))
        except np.linalg.linalg.LinAlgError:
            integral_2 = INF2
        integral_2 += inv_alpha_2 * (N + D) * 0.5 * np.log(2 * np.pi)
    integral = integral_1 + integral_2
    #    print 'integral 41: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral)
    return integral
Ejemplo n.º 18
0
def calculate_p(z_scores: np.array) -> np.array:
    """
    Function that calculates P for the MAMA results

    :param z_scores: Z scores

    :return: P values for MAMA
             (as strings, to allow for very large negative exponents)
    """
    # Since P = 2 * normal_cdf(-|Z|), P = e ^ (log_normal_cdf(-|Z|) + ln 2)
    # This can be changed to base 10 as P = 10 ^ ((log_normal_cdf(-|Z|) + ln 2) / ln 10)
    log_10_p = RECIP_LN_10 * (norm.logcdf(-np.abs(z_scores)) + LN_2)

    # Break up the log based 10 of P values into the integer and fractional part
    # To handle the case of Z = 0 (and not result in "10e-1"), set initial values to (-1.0, 1.0)
    frac_part, int_part = np.full_like(z_scores,
                                       -1.0), np.full_like(z_scores, 1.0)
    np.modf(log_10_p, out=(frac_part, int_part), where=(z_scores != 0.0))

    # Construct strings for the P values
    # 1) Add one to the fractional part to ensure that the result mantissa is between 1 and 10
    # 2) Subtract one from the integer part to compensate and keep the overall value correct
    result = np.char.add(
        np.char.add(np.power(10.0, (frac_part + 1.0)).astype(str), 'e'),
        (int_part - 1).astype(int).astype(str))

    return result
Ejemplo n.º 19
0
def q2qnbinom(counts, input_mean, output_mean, dispersion):
    """ Quantile to Quantile for a negative binomial
    """
    zero = logical_or(input_mean < 1e-14, output_mean < 1e-14)
    input_mean[zero] = input_mean[zero] + 0.25
    output_mean[zero] = output_mean[zero] + 0.25
    ri = 1 + multiply(np.matrix(dispersion).T, input_mean)
    vi = multiply(input_mean, ri)
    rO = 1 + multiply(np.matrix(dispersion).T, output_mean)
    vO = multiply(output_mean, rO)
    i = counts >= input_mean
    low = logical_not(i)
    p1 = empty(counts.shape, dtype=np.float64)
    p2 = p1.copy()
    q1, q2 = p1.copy(), p1.copy()
    if i.any():
        p1[i] = norm.logsf(counts[i], loc=input_mean[i], scale=np.sqrt(vi[i]))[0, :]
        p2[i] = gamma.logsf(counts[i], (input_mean / ri)[i], scale=ri[i])[0, :]
        q1[i] = norm.ppf(1 - np.exp(p1[i]), output_mean[i], np.sqrt(vO[i]))[0, :]
        q2[i] = gamma.ppf(1 - np.exp(p2[i]), np.divide(output_mean[i], rO[i]), scale=rO[i])[0, :]

    if low.any():
        p1[low] = norm.logcdf(counts[low], loc=input_mean[low], scale=np.sqrt(vi[low]))[0, :]
        p2[low] = gamma.logcdf(counts[low], input_mean[low] / ri[low], scale=ri[low])[0, :]
        q1[low] = norm.ppf(np.exp(p1[low]), loc=output_mean[low], scale=np.sqrt(vO[low]))[0, :]
        q2[low] = gamma.ppf(np.exp(p2[low]), output_mean[low] / rO[low], scale=rO[low])[0, :]
    return (q1 + q2) / 2
def marginal_pdf_distance(r, rmin, rmax, mu, sigma, mlim):
    """
    Calculate the expected marginal distribution of distances given the parallax survey parameters. The
    calculation is only approximate as a the magnitude limit is applied to the error-free true apparent
    magnitude.
    
    Parameters
    ----------
    
    r : float vector
        Values of r for which to calculate p(r).
    rmin : float
        Minimum distance in survey.
    rmax : float
        Maximum distance in survey.
    mu : float
        Mean of the true absolute magnitude distribution.
    sigma : float
        Standard deviation of the true absolute magnitude distribution.
    mlim : float
        Apparent magnitude limit of the survey.
        
    Returns
    -------
    
    p(r) as float vector.
    """
    A = rmax**3 - rmin**3
    pdf = lambda x: np.exp(
        np.log(3) - np.log(A) + 2 * np.log(x) + norm.logcdf(
            mlim - mu - 5 * np.log10(x) + 5, scale=sigma))
    C, dummy = quad(pdf, rmin, rmax)
    return pdf(r) / C
Ejemplo n.º 21
0
	def __init__(self, K, Y, init=None, threshold=1e-9):
		
		N = np.shape(K)[0]
		f = np.zeros((N,1))
		converged = False
		k = 0
		innerC = 0

		for i in xrange(N):
			pdfDiff = norm.logpdf(f) - norm.logcdf(Y*f)
			W = np.exp(2*pdfDiff) + Y*f*np.exp(pdfDiff)
			Wsqrt = np.sqrt(W)
			Wdiag= np.diag(Wsqrt.flatten())

			B = np.identity(N) + np.dot(Wdiag, np.dot(K, Wdiag))
			grad = Y*np.exp(pdfDiff)
			b = W*f + grad
			interim = np.dot(Wdiag, np.dot(K, b))

			cgRes = Cg(B, interim, threshold=threshold)
			s1 = cgRes.result
			innerC = innerC + cgRes.iterations
			a = b - Wsqrt*s1

			if(converged):
				break
			f_prev = f
			f = np.dot(K, a)
			diff = f - f_prev
			if (np.dot(diff.T,diff).flatten() < threshold*N or innerC>15000):
				converged = True
			k = k+1

		self.result = f
		self.iterations = k + innerC
Ejemplo n.º 22
0
def upper_bound_logpartition(tau, inv_alpha_1):
    tau_1, tau_2 = tau[:D+N], tau[D+N:]
    tau_1_N, tau_2_N = tau_1[D:], tau_2[D:]     # first D values correspond to w
    alpha_1 = 1.0 / inv_alpha_1
    inv_alpha_2 = 1 - inv_alpha_1
#    if np.any(tau_1 <= 0.01):
#        print 'one of the tau_1 <= 0.01: setting integral_1 to INF'
    if np.any(tau_1 <= 0):
#        print 'one of the tau_1 <= 0: setting integral_1 to INF'
#    if np.any(tau_1 <= 0) or np.any(tau_1 > min_eigvals_A):
        integral_1 = INF2
    else:
        integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1)) ) \
                        + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \
                        + 0.5 * np.sum(np.power(tau_2, 2) / tau_1)
    mat = A - np.diag(tau_1)
    sign, logdet = np.linalg.slogdet(mat)
    if (sign <= 0) or np.isinf(logdet):
        print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet)
        integral_2 = INF2
    else:
        try:
            integral_2 = -0.5 * inv_alpha_2 * (-(D+N)*np.log(inv_alpha_2) + logdet) \
                            + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2))
        except np.linalg.linalg.LinAlgError:
            integral_2 = INF2
    integral = integral_1 + integral_2
    return integral
Ejemplo n.º 23
0
def upper_bound_logpartition(tau, inv_alpha_1):
    tau_1, tau_2 = tau[:D+N], tau[D+N:]
    tau_1_N, tau_2_N = tau_1[D:], tau_2[D:]     # first D values correspond to w
    alpha_1 = 1.0 / inv_alpha_1
    inv_alpha_2 = 1 - inv_alpha_1
    if np.any(tau_1 <= 0):
#    if np.any(tau_1 <= 0) or np.any(tau_1 > min_eigvals_A):
        print 'one of the tau_1 <= 0: setting integral_1 to INF'
        integral_1 = INF2
    else:
        integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1)) ) \
                        + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \
                        + 0.5 * np.sum(np.power(tau_2, 2) / tau_1)
    mat = A - np.diag(tau_1)
    sign, logdet = np.linalg.slogdet(mat)
    if (sign <= 0) or np.isinf(logdet):
        print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet)
        integral_2 = INF2
    else:
        try:
            integral_2 = -0.5 * inv_alpha_2 * ((D+N)*np.log(inv_alpha_2) + logdet) \
                            + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2))
        except np.linalg.linalg.LinAlgError:
            integral_2 = INF2
    integral = integral_1 + integral_2
    return integral
Ejemplo n.º 24
0
    def predict(self, Y, X, parameter_sample):
        """Given a sample of (X,Y) as well as a sample of network parameters, 
        compute p_{\theta}(Y|X) and compare against the actual values of Y"""

        # first comupute coefficients * X; shape (n,B)
        inner_products = np.matmul(X, np.transpose(parameter_sample))

        # Second, use the normal cdf to transform into [0,1]
        log_probs = norm.logcdf(inner_products)

        # compute predictions
        #predictions = (np.exp(log_probs) > 0.5).astype('float')
        # #print("[np.where(probs > 0.5)]", list(zip(np.where(mat > 0.5)[0], np.where(mat > 0.5)[1])).shape)
        # predictions = np.zeros((n,B))
        # tuples = np.where(np.exp(log_probs) > 0.5)
        # indices = np.array(list(zip(tuples[0], tuples[1])))
        # predictions[indices] = 1

        # use predictions for accuracy computation
        ae = np.abs(np.exp(log_probs) - Y[:, np.newaxis])

        # compute cross-entropy
        cross_entropy = -(log_probs * Y[:, np.newaxis] +
                          np.logaddexp(0, -log_probs) *
                          (1.0 - Y[:, np.newaxis]))

        return (log_probs, ae, cross_entropy)
Ejemplo n.º 25
0
def MES(f_x: NDArray[float], pred_mu: NDArray[float], pred_var: NDArray[float],
        k: int) -> float:
    y_star = f_x.max(axis=0)
    y_sample = np.tile(y_star, (pred_mu.shape[0], 1))
    gamma_y = (y_sample.T - pred_mu) / np.sqrt(pred_var)  #gamma_y D*K配2
    print(gamma_y)
    print(np.mean(gamma_y, axis=1))
    print(np.var(gamma_y, axis=1))
    fig = plt.figure(figsize=(20, 10))
    ax1 = fig.add_subplot(2, 2, 1)
    plt.title("distribution")
    ax1.plot(X.ravel(), y_sample.T.ravel(), "g", label="y_star")
    ax1.plot(X.ravel(), pred_mu, "b", label="pred_mu")
    ax1.plot(X.ravel(), ((y_sample.T - pred_mu.ravel())).ravel(),
             "r",
             label="y_star-pred_mu")
    ax1.legend(loc="lower left", prop={'size': 8})
    ax2 = fig.add_subplot(2, 2, 2)
    ax2.plot(X.ravel(), gamma_y.ravel(), "r", label="gamma_y")
    ax2.legend(loc="upper left", prop={'size': 8})
    ax3 = fig.add_subplot(2, 2, 3)
    #ax3.plot(X.ravel(),((y_sample.T-pred_mu.ravel())).ravel(),"r",label="pred_mu-pred_var")
    ax3.plot(X.ravel(), np.sqrt(pred_var), "g", label="sqrt(pred_var)")
    ax3.legend(loc="lower left", prop={'size': 8})
    plt.savefig(result_dir_path + savefig_pass + str(seed) + "/gamma_y" +
                ".pdf")
    #plt.show()
    plt.close()
    #sys.exit()
    psi_gamma = norm.pdf(gamma_y, loc=pred_mu, scale=np.sqrt(pred_var))
    fig = plt.figure(figsize=(20, 10))
    ax1 = fig.add_subplot(2, 2, 1)
    plt.title("distribution")
    ax1.plot(X.ravel(), gamma_y.ravel(), "r", label="gamma_y")
    ax1.legend(loc="upper left", prop={'size': 8})
    ax2 = fig.add_subplot(2, 2, 2)
    ax2.plot(X.ravel(), psi_gamma.ravel(), "r", label="psi_gamma")
    ax2.legend(loc="upper left", prop={'size': 8})
    large_psi_gamma = norm.cdf(gamma_y, loc=pred_mu, scale=np.sqrt(pred_var))
    ax3 = fig.add_subplot(2, 2, 3)
    ax3.plot(X.ravel(), large_psi_gamma.ravel(), "r", label="large_psi_gamma")
    ax3.legend(loc="lower left", prop={'size': 8})
    ax4 = fig.add_subplot(2, 2, 4)
    log_large_psi_gamma = norm.logcdf(gamma_y,
                                      loc=pred_mu,
                                      scale=np.sqrt(pred_var))
    ax4.plot(X.ravel(),
             log_large_psi_gamma.ravel(),
             "r",
             label="log_large_psi_gamma")
    ax4.legend(loc="lower left", prop={'size': 8})
    temp = np.divide((gamma_y * psi_gamma), (2 * large_psi_gamma),
                     out=np.zeros_like(gamma_y * psi_gamma),
                     where=(2 * large_psi_gamma) != 0) - log_large_psi_gamma
    alpha = np.sum(temp, axis=0) / k
    plt.savefig(result_dir_path + savefig_pass + str(seed) + "/distribution_" +
                ".pdf")
    plt.close()
    return alpha
Ejemplo n.º 26
0
def sep_logpdf(x, mu=0., sigma=1., nu=0, tau=2):

    z = (x - mu) / sigma
    w = np.sign(z) * np.abs(z)**(tau / 2) * nu * np.sqrt(2. / tau)
    # Note: There is a sigma division in the paper
    logp = np.log(2) + norm.logcdf(w) + ep2_logpdf(x, mu, sigma, tau)

    return logp
Ejemplo n.º 27
0
def log_probability_via_sampling(means: np.ndarray, stdevs: np.ndarray,
                                 n_draws: int) -> np.ndarray:

    # TODO: Currently expects means and stdevs to be 1D. Maybe could do n-d.
    # TODO: This could really do with the odd unit test.

    draws = np.random.normal(means, stdevs, size=(n_draws, means.shape[0]))

    # OK, now to do the logsumexp trick.
    pre_factor = -np.log(n_draws)
    presence_log_probs = norm.logcdf(draws)
    absence_log_probs = norm.logcdf(-draws)

    presence_results = logsumexp(pre_factor + presence_log_probs, axis=0)
    absence_results = logsumexp(pre_factor + absence_log_probs, axis=0)

    return np.stack([absence_results, presence_results], axis=1)
    def _logcdf(self, x, stddev_ratio):

        norm_arg = self._norm_pdf_arg(x, stddev_ratio)
        return numpy.where(
            x < 0,
            (numpy.log(2.0 / (stddev_ratio + 1)) + norm.logcdf(norm_arg)),
            numpy.log((1.0 + stddev_ratio * (2.0 * norm.cdf(norm_arg) - 1.0)) /
                      (stddev_ratio + 1)))
Ejemplo n.º 29
0
    def log_likelihood(self, smis, **targets):
        def _avoid_overflow(ll_):
            # log(exp(log(UP) - log(C)) - exp(log(LOW) - log(C))) + log(C)
            # where C = max(log(UP), max(LOW))
            ll_c = np.max(ll_)
            ll_ = np.log(np.exp(ll_[1] - ll_c) - np.exp(ll_[0] - ll_c)) + ll_c
            return ll_

        # self.update_targets(reset=False, **targets):
        for k, v in targets.items():
            if not isinstance(v, tuple) or len(v) != 2 or v[1] <= v[0]:
                raise ValueError('must be a tuple with (low, up) boundary')
            self._targets[k] = v

        if not self._targets:
            raise RuntimeError('<targets> is empty')

        ll = pd.DataFrame(np.full((len(smis), len(self._mdl)), -1000.0),
                          columns=self._mdl.keys())
        pred = self.predict(smis).reset_index(drop=True)
        tmp = pred.isna().any(axis=1)
        idx = [i for i in range(len(smis)) if ~tmp[i]]

        # calculate likelihood
        for k, (low, up) in self._targets.items():  # k: target; v: (low, up)

            # predict mean, std for all smiles
            mean, std = pred[k + ': mean'], pred[k + ': std']

            # calculate low likelihood
            low_ll = norm.logcdf(low,
                                 loc=np.asarray(mean),
                                 scale=np.asarray(std))

            # calculate up likelihood
            up_ll = norm.logcdf(up,
                                loc=np.asarray(mean),
                                scale=np.asarray(std))

            # zip low and up likelihood to a 1-dim array then save it.
            # like: [(tar_low_smi1, tar_up_smi1),  (tar_low_smi2, tar_up_smi2), ..., (tar_low_smiN, tar_up_smiN)]
            lls = zip(low_ll, up_ll)
            ll[k].iloc[idx] = np.array([*map(_avoid_overflow, list(lls))])

        return ll
Ejemplo n.º 30
0
 def loglik(mu):
     ll = -sum(norm.logpdf(values[not_censored], loc=mu, scale=std_est))
     if n_left_cens > 0:
         ll -= sum(
             norm.logcdf(values[left_censored], loc=mu, scale=std_est))
     if n_right_cens > 0:
         ll -= sum(
             norm.logsf(values[right_censored], loc=mu, scale=std_est))
     return ll
Ejemplo n.º 31
0
def upper_bound_logpartition_43(tau, inv_alpha_1):
    tau_1, tau_2 = tau[:D + N], tau[D + N:]
    tau_1_N, tau_2_N = tau_1[D:], tau_2[D:]  # first D values correspond to w
    alpha_1 = 1.0 / inv_alpha_1
    inv_alpha_2 = 1 - inv_alpha_1
    alpha_2 = 1. / inv_alpha_2
    use_exact_integral_1 = True
    use_exact_integral_2 = True
    if use_exact_integral_1:
        if np.any(tau_1 <= 0.01):
            #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF'
            integral_1 = INF2
        else:
            integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \
                            + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \
                            + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \
                            + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi)
    else:
        L = np.ones(
            N) * 0.01  # to avoid integrating the step function over reals
        ints = np.zeros(N)
        tau_1_mod = alpha_1 * tau_1
        tau_2_mod = alpha_1 * tau_2
        for i in range(N):
            if L[i] / inv_alpha_1 < tau_1_mod[
                    i]:  # numerical check that the integral is finite
                ints[i] = np.log(
                    quad(
                        lambda t: np.exp(
                            log_step_func(t) / inv_alpha_1 - 0.5 * tau_1_mod[i]
                            * np.power(t, 2) + tau_2_mod[i] * t), -np.inf,
                        np.inf)[0])
                #ints[i] = np.log(quad(lambda t: np.exp(- 0.5*tau_1_mod[i]*np.power(t, 2) + tau_2_mod[i]*t),-np.inf,np.inf)[0])
            else:
                ints[i] = np.inf
                break
        integral_1 = inv_alpha_1 * np.sum(ints)
    if use_exact_integral_2:
        mat = A - np.diag(tau_1)
        sign, logdet = np.linalg.slogdet(mat)
        if (sign < 0) or np.isinf(logdet):
            #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet)
            integral_2 = INF2
        else:
            try:
                integral_2 = inv_alpha_2 * (-0.5) * (
                    -(D + N) * np.log(inv_alpha_2) + logdet) + 0.5 * np.sum(
                        tau_2 * np.linalg.solve(mat, tau_2))
            except np.linalg.linalg.LinAlgError:
                integral_2 = INF2
            integral_2 += inv_alpha_2 * (N + D) * 0.5 * np.log(2 * np.pi)
    else:
        integral_2 = inv_alpha_2 * gauss_integral(
            alpha_2 * (A - np.diag(tau_1)), -alpha_2 * tau_2)
    integral = integral_1 + integral_2
    #    print 'integral 43: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral)
    return integral
Ejemplo n.º 32
0
    def log_likelihood(self, smis, **targets):
        def _avoid_overflow(ll_):
            # log(exp(log(UP) - log(C)) - exp(log(LOW) - log(C))) + log(C)
            # where C = max(log(UP), max(LOW))
            ll_c = np.max(ll_)
            ll_ = np.log(np.exp(ll_[1] - ll_c) - np.exp(ll_[0] - ll_c)) + ll_c
            return ll_

        ll = np.repeat(-1000.0, len(smis))
        tar_fps = self._descriptor.transform(smis)
        tar_fps = pd.DataFrame(data=tar_fps).reset_index(drop=True)
        tmp = tar_fps.isna().any(axis=1)
        idx = [i for i in range(len(smis)) if ~tmp[i]]
        tar_fps.dropna(inplace=True)

        # calculate likelihood
        ll_mat = []
        for k, (low, up) in targets.items():  # k: target; v: (low, up)

            # predict mean, std for all smiles
            mean, std = self._mdl[k].predict(tar_fps, return_std=True)

            # calculate low likelihood
            low_ll = norm.logcdf(low,
                                 loc=np.asarray(mean),
                                 scale=np.asarray(std))

            # calculate up likelihood
            up_ll = norm.logcdf(up,
                                loc=np.asarray(mean),
                                scale=np.asarray(std))

            # zip low and up likelihood to a 1-dim array then save it.
            # like: [(tar_low_smi1, tar_up_smi1),  (tar_low_smi2, tar_up_smi2), ..., (tar_low_smiN, tar_up_smiN)]
            lls = zip(low_ll, up_ll)
            ll_mat.append(list(lls))

        # sum all ll along each smiles
        # ll_sum = [[sum_low_smi1, sum_up_smi1], [sum_low_smi2, sum_up_smi2],...,[sum_low_smiN, sum_up_smiN],]
        ll_sum = np.sum(np.array(ll_mat), axis=0)
        tmp = np.array([*map(_avoid_overflow, ll_sum)])

        np.put(ll, idx, tmp)
        return ll
    def _get_f_map(self):
        """Computes maximum a posterior (MAP) evaluation of f given the data using Newton's method
        
        Returns: 
            MAP of the Gassian processes values at current datapoints
        """
        converged = False
        try_no = 0

        f_map = None

        # Newton's method to approximate f_MAP
        while not converged and try_no < 1:

            # randomly initialise f_map
            f_map = self.random_state.uniform(0., 1., self.datapoints.shape[0])

            for m in range(100):
                # compute Z
                f_sup = np.array([
                    f_map[self.comparisons[i, 0]]
                    for i in range(self.comparisons.shape[0])
                ])
                f_inf = np.array([
                    f_map[self.comparisons[i, 1]]
                    for i in range(self.comparisons.shape[0])
                ])
                Z = self._get_Z(f_sup, f_inf)
                Z_logpdf = norm.logpdf(Z)
                Z_logcdf = norm.logcdf(Z)

                # compute b
                b = self._get_b(Z_logpdf, Z_logcdf)

                # compute gradient g
                g = self._get_g(f_map, b)

                # compute hessian H
                C = self._get_C(Z)
                H = -self.K_inv + C
                H_inv = self._get_inv(H)

                # perform update
                update = np.dot(H_inv, g)
                f_map -= update

                # stop criterion
                if np.linalg.norm(update) < 0.0001:
                    converged = True
                    break

            if not converged:
                print("Did not converge.")
                try_no += 1

        return f_map
Ejemplo n.º 34
0
def log_target(be, x, constraints_funcs):
    """
    log (phi(beta * g(x))
    Parameters
    ----------
    x:
        
    
    """
    return np.sum([norm.logcdf(be * g(x)) for g in constraints_funcs])
Ejemplo n.º 35
0
 def impl(self, logEI, glogEI, diff, sigma):
     z = diff / sigma
     if z < 34:
         logcdf = norm.logcdf(-z, 0, 1)
         ddiff = -np.exp(logcdf - logEI)     # aka: -cdf / EI
     else:
         foo = 2 * .49903031
         dz = (-0.12442506 - foo * z)
         ddiff = dz / sigma
     return ddiff * glogEI
Ejemplo n.º 36
0
    def evaluate(self, x: np.ndarray) -> np.ndarray:
        """
        Evaluates the penalization function value
        """

        if self.x_batch is None:
            return np.ones((x.shape[0], 1))

        distances = _distance_calculation(x, self.x_batch)
        normalized_distance = (distances - self.radius) / self.scale
        return norm.logcdf(normalized_distance).sum(axis=1, keepdims=True)
Ejemplo n.º 37
0
    def evaluate_with_gradients(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Evaluates the penalization function value and gradients with respect to x
        """

        if self.x_batch is None:
            return np.ones((x.shape[0], 1)), np.zeros(x.shape)

        distances, d_dist_dx = _distance_with_gradient(x, self.x_batch)
        normalized_distance = (distances - self.radius) / self.scale
        h_func = norm.cdf(normalized_distance)
        d_value_dx = 0.5 * (1 / h_func[:, :, None]) \
                     * norm.pdf(normalized_distance)[:, :, None] \
                     * d_dist_dx / self.scale[None, :, None]
        return norm.logcdf(normalized_distance).sum(1, keepdims=True), d_value_dx.sum(1)
Ejemplo n.º 38
0
def upper_bound_logpartition_43(tau, inv_alpha_1):
    tau_1, tau_2 = tau[:D+N], tau[D+N:]
    tau_1_N, tau_2_N = tau_1[D:], tau_2[D:]     # first D values correspond to w
    alpha_1 = 1.0 / inv_alpha_1
    inv_alpha_2 = 1 - inv_alpha_1
    alpha_2 = 1. / inv_alpha_2
    use_exact_integral_1 = True
    use_exact_integral_2 = True
    if use_exact_integral_1:
        if np.any(tau_1 <= 0.01):
            #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF'
            integral_1 = INF2
        else:
            integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \
                            + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \
                            + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \
                            + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi)
    else:
        L = np.ones(N) * 0.01 # to avoid integrating the step function over reals
        ints = np.zeros(N)
        tau_1_mod = alpha_1 * tau_1
        tau_2_mod = alpha_1 * tau_2
        for i in range(N):
            if L[i]/inv_alpha_1 < tau_1_mod[i]:  # numerical check that the integral is finite      
                ints[i] = np.log(quad(lambda t: np.exp(log_step_func(t)/inv_alpha_1 - 0.5*tau_1_mod[i]*np.power(t, 2) + tau_2_mod[i]*t),-np.inf,np.inf)[0])
                #ints[i] = np.log(quad(lambda t: np.exp(- 0.5*tau_1_mod[i]*np.power(t, 2) + tau_2_mod[i]*t),-np.inf,np.inf)[0])
            else:   
                ints[i] = np.inf
                break
        integral_1 = inv_alpha_1 * np.sum(ints)
    if use_exact_integral_2:
        mat = A - np.diag(tau_1)
        sign, logdet = np.linalg.slogdet(mat)
        if (sign < 0) or np.isinf(logdet):
            #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet)
            integral_2 = INF2
        else:
            try:
                integral_2 = inv_alpha_2 * (-0.5) * (-(D+N)*np.log(inv_alpha_2) + logdet) + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2))
            except np.linalg.linalg.LinAlgError:
                integral_2 = INF2
            integral_2 += inv_alpha_2 * (N+D) * 0.5 * np.log(2 * np.pi)
    else:
        integral_2 = inv_alpha_2 * gauss_integral(alpha_2 * (A - np.diag(tau_1)), -alpha_2 * tau_2)
    integral = integral_1 + integral_2
#    print 'integral 43: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral)
    return integral
Ejemplo n.º 39
0
    def perform(self, node, inputs, output_storage):
        logEI, gEI, diff, sigma = inputs
        z = diff / sigma
        logcdf = norm.logcdf(-z, 0, 1)
        logpdf = norm.logpdf(-z, 0, 1)
        #for zi, a, b, c in zip(z, logcdf, logpdf, logEI):
            #print zi, 'cdf', a, 'pdf', b, 'EI', c, 'logdz', a - c, 'logsig', b - c
        dz = -np.exp(logcdf - logEI)     # aka: -cdf / EI
        dsigma = np.exp(logpdf - logEI)  # aka: pdf / EI

        #if np.any(z > 20):
        #    print 'NormalLogEIGrad: bigz', z[z > 20]

        foo = 2 * .49903031
        dz[z > 34] = -0.12442506 - foo * z[z > 34]
        dsigma[z > 34] = dz[z > 34] * (-z[z > 34] / sigma[z > 34])
        dz[z > 34] /= sigma[z > 34]

        output_storage[0][0] = dz * gEI
        output_storage[1][0] = dsigma * gEI
Ejemplo n.º 40
0
def pnorm(x, mean=0, sd=1, lowertail=True, log=False):
    """
    ============================================================================
                                                                        pnorm()
    ============================================================================
    The cumulative distribution function for the normal distribution.
    You provide a value along the normal distribution (eg x=3) or array of
    values, and it returns what proportion of values lie below it (the quantile)

    Alternatively, if you select lowertail=False, it returns the proportion of
    values that are above it.

    USAGE:
    cnorm(mean=0, sd=1, type="equal", conf=0.95)
    dnorm(x, mean=0, sd=1, log=False)
    pnorm(q, mean=0, sd=1, lowertail=True, log=False)
    qnorm(p, mean=0, sd=1, lowertail=True, log=False)
    rnorm(n=1, mean=0, sd=1)

    :param x (float, array of floats): The values along the distribution.
    :param mean (float):     mean of the distribution
    :param sd (float):       standard deviation
    :param lowertail (bool): are you interested in what proportion of values
                             lie beneath x? or above x (false)?
    :param log (bool):       take the log?
    :return:        an array of quantiles() corresponding to the values in x
    ============================================================================
    """
    if lowertail and not log:
        return norm.cdf(x, loc=mean, scale=sd)
    elif not lowertail and not log:
        return norm.sf(x, loc=mean, scale=sd)
    elif lowertail and log:
        return norm.logcdf(x, loc=mean, scale=sd)
    else:
        return norm.logsf(x, loc=mean, scale=sd)
def lnlike_limit(theta, x_limit, y_limit, yerr_limit=0.1):
    """Non-detections."""
    m, b = theta
    model = m * x_limit + b
    return np.sum(norm.logcdf(model - y_limit, scale=yerr_limit))
Ejemplo n.º 42
0
    def compute(self, X, derivative=False, **kwargs):

        """
        A call to the object returns the log(EI) and derivative values.

        :param X: The point at which the function is to be evaluated.
        :type X: np.ndarray (1,D)
        :param incumbent: The current incumbent
        :type incumbent: np.ndarray (1,D)
        :param derivative: This controls whether the derivative is to be returned.
        :type derivative: Boolean
        :return: The value of log(EI)
        :rtype: np.ndarray(1, 1)
        :raises BayesianOptimizationError: if X.shape[0] > 1. Only single X can be evaluated.
        """
        if derivative:
            print("LogEI does not support derivative calculation until now")
            return

        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            return np.array([[- np.finfo(np.float).max]])
        m, v = self.model.predict(X)

        incumbent, _ = self.compute_incumbent(self.model)
        eta, _ = self.model.predict(np.array([incumbent]))

        f_min = eta - self.par

        s = np.sqrt(v)

        z = (f_min - m) / s

        log_ei = np.zeros((m.size, 1))
        for i in range(0, m.size):
            mu, sigma = m[i], s[i]

        #    par_s = self.par * sigma

            # Degenerate case 1: first term vanishes
            if np.any(abs(f_min - mu)) == 0:
                if sigma > 0:
                    log_ei[i] = np.log(sigma) + norm.logpdf(z[i])
                else:
                    log_ei[i] = -np.Infinity
            # Degenerate case 2: second term vanishes and first term has a special form.
            elif sigma == 0:
                if mu < np.any(f_min):
                    log_ei[i] = np.log(f_min - mu)
                else:
                    log_ei[i] = -np.Infinity
            # Normal case
            else:
                b = np.log(sigma) + norm.logpdf(z[i])
                # log(y+z) is tricky, we distinguish two cases:
                if np.any(f_min > mu):
                    # When y>0, z>0, we define a=ln(y), b=ln(z).
                    # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ],
                    # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|))
                    a = np.log(f_min - mu) + norm.logcdf(z[i])

                    log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a)))
                else:
                    # When y<0, z>0, we define a=ln(-y), b=ln(z), and it has to be true that b >= a in order to satisfy y+z>=0.
                    # Then y+z = exp[ b + ln(exp(b-a) -1) ],
                    # and thus log(y+z) = a + ln(exp(b-a) -1)
                    a = np.log(mu - f_min) + norm.logcdf(z[i])
                    if a >= b:
                        # a>b can only happen due to numerical inaccuracies or approximation errors
                        log_ei[i] = -np.Infinity
                    else:
                        log_ei[i] = b + np.log(1 - np.exp(a - b))

        return log_ei
Ejemplo n.º 43
0
def _pln_logpdf(x, alpha, nu, tau2):
    return np.log(alpha) + alpha * nu + alpha * tau2 / 2 - \
        (alpha + 1) * np.log(x) + \
        norm.logcdf((np.log(x) - nu - alpha * tau2) / np.sqrt(tau2))
Ejemplo n.º 44
0
 def log_cdf(self, s):
     return norm.logcdf(s, loc=self.mu, scale=self.std)
Ejemplo n.º 45
0
    def compute(self, X, derivative=False, **kwargs):
        """
        Computes the Log EI value and its derivatives.

        Parameters
        ----------
        X: np.ndarray(1, D), The input point where the acquisition function
            should be evaluate. The dimensionality of X is (N, D), with N as
            the number of points to evaluate at and D is the number of
            dimensions of one X.

        derivative: Boolean
            If is set to true also the derivative of the acquisition
            function at X is returned
            Not implemented yet!

        Returns
        -------
        np.ndarray(1,1)
            Log Expected Improvement of X
        np.ndarray(1,D)
            Derivative of Log Expected Improvement at X
            (only if derivative=True)
        """
        if derivative:
            logger.error("LogEI does not support derivative \
                calculation until now")
            return

        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            return np.array([[- np.finfo(np.float).max]])
        m, v = self.model.predict(X)

        _, eta = self.rec.estimate_incumbent(None)

        f_min = eta - self.par

        s = np.sqrt(v)

        z = (f_min - m) / s

        log_ei = np.zeros((m.size, 1))
        for i in range(0, m.size):
            mu, sigma = m[i], s[i]

        #    par_s = self.par * sigma

            # Degenerate case 1: first term vanishes
            if np.any(abs(f_min - mu)) == 0:
                if sigma > 0:
                    log_ei[i] = np.log(sigma) + norm.logpdf(z[i])
                else:
                    log_ei[i] = -np.Infinity
            # Degenerate case 2: second term vanishes and first term
            # has a special form.
            elif sigma == 0:
                if mu < np.any(f_min):
                    log_ei[i] = np.log(f_min - mu)
                else:
                    log_ei[i] = -np.Infinity
            # Normal case
            else:
                b = np.log(sigma) + norm.logpdf(z[i])
                # log(y+z) is tricky, we distinguish two cases:
                if np.any(f_min > mu):
                    # When y>0, z>0, we define a=ln(y), b=ln(z).
                    # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ],
                    # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|))
                    a = np.log(f_min - mu) + norm.logcdf(z[i])

                    log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a)))
                else:
                    # When y<0, z>0, we define a=ln(-y), b=ln(z),
                    # and it has to be true that b >= a in
                    # order to satisfy y+z>=0.
                    # Then y+z = exp[ b + ln(exp(b-a) -1) ],
                    # and thus log(y+z) = a + ln(exp(b-a) -1)
                    a = np.log(mu - f_min) + norm.logcdf(z[i])
                    if a >= b:
                        # a>b can only happen due to numerical inaccuracies
                        # or approximation errors
                        log_ei[i] = -np.Infinity
                    else:
                        log_ei[i] = b + np.log(1 - np.exp(a - b))

        return log_ei
Ejemplo n.º 46
0
 def _hammer_function(self, x,x0,r_x0, s_x0):
     '''
     Creates the function to define the exclusion zones
     '''
     return norm.logcdf((np.sqrt((np.square(np.atleast_2d(x)[:,None,:]-np.atleast_2d(x0)[None,:,:])).sum(-1))- r_x0)/s_x0)
Ejemplo n.º 47
0
def rma_bg_correct(Y, make_copy = False):
    """RMA background correction.

    Parameters
    ----------
    Y: np.ndarray (ndim = 1, dtype = np.float32)
        The microarray intensity values (on a linear scale).
    make_copy: bool
        Whether or to make a copy of the data or modify it in-place.
    """
    assert isinstance(Y, np.ndarray)
    
    if make_copy:
        Y = Y.copy()
    
    n = Y.shape[1]
    
    for j in range(n):
        
        # find missing data (= NaN)
        missing = np.isnan(Y[:,j])
        y = Y[~missing,j]
        
        ### estimate mu using simple binning (histogram)

        # use a fixed number of bins
        num_bins = 100
        lower = np.amin(y)
        upper = np.percentile(y, 75.0)
        bin_width = max(floor((upper - lower) / num_bins), 1.0)
        bin_edges = np.arange(lower, upper, bin_width)
        num_bins = bin_edges.size - 1
        
        # binning
        binned = np.digitize(y, bins = bin_edges) - 1
        binned = binned[binned < num_bins]
        bc = np.bincount(binned)
        amax = np.argmax(bc)
        max_x = lower + (amax + 0.5) * bin_width
        mu = max_x
        logger.debug('Mu: %.2f', mu)

        ### estimate sigma

        # 1. Select probes with values smaller than mu
        y_low = y[y < mu]
        # 2. Estimate their standard deviation (using mu as the mean)
        sigma = pow(np.sum(np.power(y_low - mu, 2.0)) / (y_low.size - 1), 0.5)
        # 3. Arbitrarily multiply standard deviation by square root of two
        sigma *= pow(2.0, 0.5)
        logger.debug('Sigma: %.2f', sigma)

        ### estimate alpha

        # we simply fix alpha to 0.03
        alpha = 0.03

        ### calculate background-corrected intensities
        a = y - mu - alpha * pow(sigma, 2.0)
        y_adj = a + sigma * np.exp(norm.logpdf(a / sigma) - norm.logcdf(a / sigma))
        Y[~missing,j] = y_adj

    return Y
Ejemplo n.º 48
0
    nu = Ne - 1
    L = len(matrix)
    for i in xrange(trials):
        ep = score_seq(matrix, random_site(L))
        acc += (1/(1+exp(ep-mu)))**(Ne-1)
    mean_Zs = acc / trials
    return L * log(4) + log(mean_Zs)

def log_ZM_naive((matrix, mu, Ne), N, trials=1000):
    return N * log_ZS_naive((matrix, mu, Ne), trials=1000)
    
def log_ZS_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    return log_Zs

def log_ZM_hack((matrix, mu, Ne), N):
    log_ZS = log_ZS_hack((matrix, mu, Ne), N)
    return N * log_ZS

def log_Z_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    ans_ref = ((N*L * log(4)) +  log_perc_below_threshold)
    ans = N * log_Zs
Ejemplo n.º 49
0
def log_mog_cdf(w, k_vec, mu_vec, sigma_vec):
    eps = 1.e-300
    exp_term = norm.logcdf(w, loc = mu_vec, scale = sigma_vec+eps)
    coefficients = k_vec
    return logsumexp(exp_term, b = coefficients)