def get_logdelta_ana_gaussian(sigma, eps): """ This function calculates the delta parameter for analytical gaussian mechanism given eps""" assert (eps >= 0) s, mag = utils.stable_log_diff_exp( norm.logcdf(0.5 / sigma - eps * sigma), eps + norm.logcdf(-0.5 / sigma - eps * sigma)) return mag
def loglikeobs(self, params_all): """ Log-likelihood of model. Parameters ---------- params_all : array-like Parameter estimates, with the parameters for the regression equation coming first, then the parameters for the selection equation, then log sigma, then atanh rho. Returns ------- loglike : float The value of the log-likelihood function for a Heckman correction model. """ # set up data and parameters needed to compute log likelihood Y, X, Z = self.get_datamats() D = self.treated num_xvars = X.shape[1] num_zvars = Z.shape[1] xbeta = np.asarray(params_all[:num_xvars]) # reg eqn coefs zbeta = np.asarray(params_all[num_xvars:num_xvars + num_zvars]) # selection eqn coefs log_sigma = params_all[-2] atanh_rho = params_all[-1] sigma = np.exp(log_sigma) rho = np.tanh(atanh_rho) # line the data vectors up Z_zbeta_aligned = Z.dot(zbeta) X_xbeta = X.dot(xbeta) X_xbeta_aligned = np.empty(self.nobs_total) X_xbeta_aligned[:] = np.nan X_xbeta_aligned[D] = X_xbeta del X_xbeta Y_aligned = np.empty(self.nobs_total) Y_aligned[:] = np.nan Y_aligned[D] = Y # create an array where each row is the log likelihood for the corresponding observation norm_cdf_input = ( Z_zbeta_aligned + (Y_aligned - X_xbeta_aligned) * rho / sigma) / np.sqrt(1 - rho**2) norm_cdf_input[~D] = 0 # dummy value ll_obs_observed = np.multiply(D, norm.logcdf(norm_cdf_input) - \ (1./2.)*((Y_aligned-X_xbeta_aligned)/sigma)**2 - \ np.log(np.sqrt(2*np.pi)*sigma)) ll_obs_observed[~D] = 0 ll_obs_notobserved = np.multiply(1 - D, norm.logcdf(-Z_zbeta_aligned)) ll_obs = ll_obs_observed + ll_obs_notobserved # return log likelihood by observation vector return ll_obs
def integrateNormalDensity(lb, ub, mu=0, sigma=1): from scipy.stats import norm assert not (ub < lb) lessThanUpper = norm.logcdf(ub, loc=mu, scale=sigma) lessThanLower = norm.logcdf(lb, loc=mu, scale=sigma) #print lessThanUpper,lessThanLower,lessThanUpper-lessThanLower,1 - math.exp(lessThanLower - lessThanUpper) return lessThanUpper + np.log1p(-math.exp(lessThanLower - lessThanUpper))
def constraint_indicator_func(x, indicator_how): if indicator_how == "Fermi_Dirac": return np.log(expit(x)) elif indicator_how == "normal": return norm.logcdf(x) else: return norm.logcdf(x)
def _infer_probabilities(self, features, stdev): """ calculate probabilities of selected primitives out of polynom features and their standard deviation assuming that the features are independent and normal distributed with mean = coefficient it self and stdev. Args: features (ndarray): polynom coefficients stdev (ndarray): standard deviation of the polynom coefficients """ # get the max of each feature over time and take a procentage of it, attention to second derivative. deltas = np.nanmax(np.absolute(features), axis=0) * self.delta + np.nanmin(np.absolute(features), axis=0) # np.nanmax(np.absolute(b), axis=0) * [0.2, self.delta, self.delta] # Convert regression coefficients to probabilities for qualitative state, normalised features to use norm.logcdf prob_p = norm.logcdf((features - deltas) / stdev) prob_n = norm.logcdf((-features - deltas) / stdev) prob_pn = norm.logcdf((deltas - features) / stdev) prob_0 = prob_pn + np.log1p(-np.exp(prob_n-prob_pn)) params = {'pos': prob_p, 'neg': prob_n, 'zero': prob_0, 'ignore': np.zeros(features.shape)} py = np.zeros((features.shape[0], self.prim_nr)) # collect for each primitive the right probability and adds them together (assume independent probabilities) for i, prim in enumerate(self.primitives): # iterate over primitives # iterate over signal, 1st and 2nd derivatives, this is equal to iterate over params of polynom. prob = [params[sign][:, j] for j, sign in enumerate(self.all_primitives[prim]) if j < self.coeff_nr] py[:, i] = np.sum(prob, axis=0) return py
def calculate_log_joint_bernoulli_likelihood(latent_prob_samples: np.ndarray, outcomes: np.ndarray, link: str = "probit") -> float: # latent_prob_samples is n_samples x n_outcomes array of probabilities on # the probit scale # outcomes is (n_outcomes,) array of binary outcomes (1 and 0) assert latent_prob_samples.shape[1] == outcomes.shape[0] # Make sure broadcasting is unambiguous assert latent_prob_samples.shape[0] != outcomes.shape[0] n_samples = latent_prob_samples.shape[0] # Get log likelihood for each draw assert link in ["logit", "probit"], "Only logit and probit links supported!" if link == "probit": individual_liks = np.sum( outcomes * norm.logcdf(latent_prob_samples) + (1 - outcomes) * norm.logcdf(-latent_prob_samples), axis=1, ) else: individual_liks = np.sum( outcomes * np.log(expit(latent_prob_samples)) + (1 - outcomes) * np.log(1 - expit(latent_prob_samples)), axis=1, ) # Compute the Monte Carlo expectation return logsumexp(individual_liks - np.log(n_samples))
def gaussianize_1d(X, pi, mu, sigma_sqr): mask_bound = 5e-8 N, D = X.shape # for calculations please see: https://www.overleaf.com/6125358376rgmjjgdsmdmm scaled = (X.unsqueeze(-1) - mu) / sigma_sqr**0.5 scaled = scaled.cpu() normal_cdf = to_tensor(norm.cdf(scaled)) cdf = (pi * normal_cdf).sum(-1) log_cdfs = to_tensor(norm.logcdf(scaled)) log_cdf = torch.logsumexp(torch.log(pi) + log_cdfs, dim=-1) log_sfs = to_tensor(norm.logcdf(-1*scaled)) log_sf = torch.logsumexp(torch.log(pi) + log_sfs, dim=-1) # Approximate Gaussian CDF # inv(CDF) ~ np.sqrt(-2 * np.log(1-x)) #right, x -> 1 # inv(CDF) ~ -np.sqrt(-2 * np.log(x)) #left, x -> 0 # 1) Step1: invert good CDF cdf_mask = ((cdf > mask_bound) & (cdf < 1 - (mask_bound))).double() # Keep good CDF, mask the bad CDF values to 0.5(inverse(0.5)=0.) cdf_good = cdf * cdf_mask + 0.5 * (1. - cdf_mask) inverse_cdf = normal_distribution.icdf(cdf_good) # 2) Step2: invert BAD large CDF cdf_mask_right = (cdf >= 1. - (mask_bound)).double() # Keep large bad CDF, mask the good and small bad CDF values to 0. cdf_bad_right_log = log_sf * cdf_mask_right inverse_cdf += torch.sqrt(-2. * cdf_bad_right_log) # 3) Step3: invert BAD small CDF cdf_mask_left = (cdf <= mask_bound).double() # Keep small bad CDF, mask the good and large bad CDF values to 1. cdf_bad_left_log = log_cdf * cdf_mask_left inverse_cdf += (-torch.sqrt(-2 * cdf_bad_left_log)) if torch.isnan(inverse_cdf.max()) or torch.isnan(inverse_cdf.min()): print('inverse CDF: NaN.') pdb.set_trace() if torch.isinf(inverse_cdf.max()) or torch.isinf(inverse_cdf.min()): print('inverse CDF: Inf.') exit(0) pdb.set_trace() # old simple (and possibly numerically unstable) way cdf2 = norm.cdf(scaled) # remove outliers cdf2[cdf2<EPS] = EPS cdf2[cdf2>1-EPS] = 1 - EPS new_distr = (pi.cpu().numpy() * cdf2).sum(-1) new_X = norm.ppf(new_distr) new_X = to_tensor(new_X) if False and torch.norm(new_X - inverse_cdf) > 10: print('Gaussianization 1D mismatch.') pdb.set_trace() # return inverse_cdf, cdf_mask, [log_cdf, cdf_mask_left], [log_sf, cdf_mask_right] return new_X, cdf_mask, [log_cdf, cdf_mask_left], [log_sf, cdf_mask_right]
def log_likelihood(self, smis, *, log_0=-1000.0, **targets): def _avoid_overflow(ll_): # log(exp(log(UP) - log(C)) - exp(log(LOW) - log(C))) + log(C) # where C = max(log(UP), max(LOW)) ll_c = np.max(ll_) ll_ = np.log(np.exp(ll_[1] - ll_c) - np.exp(ll_[0] - ll_c)) + ll_c return ll_ # self.update_targets(reset=False, **targets): for k, v in targets.items(): if not isinstance(v, tuple) or len(v) != 2 or v[1] <= v[0]: raise ValueError('must be a tuple with (low, up) boundary') self.targets[k] = v if not self.targets: raise RuntimeError('<targets> is empty') if isinstance(smis, (pd.Series, pd.DataFrame)): ll = pd.DataFrame(np.full((len(smis), len(self._mdl)), log_0), index=smis.index, columns=self._mdl.keys()) else: ll = pd.DataFrame(np.full((len(smis), len(self._mdl)), log_0), columns=self._mdl.keys()) # 1. apply prediction on given sims # 2. reset returns' index to [0, 1, ..., len(smis) - 1], this should be consistent with ll's index # 3. drop all rows which have NaN value(s) pred = self.predict(smis).reset_index(drop=True).dropna(axis='index', how='any') # because pred only contains available data # 'pred.index.values' should eq to the previous implementation idx = pred.index.values # calculate likelihood for k, (low, up) in self.targets.items(): # k: target; v: (low, up) # predict mean, std for all smiles mean, std = pred[k + ': mean'], pred[k + ': std'] # calculate low likelihood low_ll = norm.logcdf(low, loc=np.asarray(mean), scale=np.asarray(std)) # calculate up likelihood up_ll = norm.logcdf(up, loc=np.asarray(mean), scale=np.asarray(std)) # zip low and up likelihood to a 1-dim array then save it. # like: [(tar_low_smi1, tar_up_smi1), (tar_low_smi2, tar_up_smi2), ..., (tar_low_smiN, tar_up_smiN)] lls = zip(low_ll, up_ll) ll[k].iloc[idx] = np.array([*map(_avoid_overflow, list(lls))]) return ll
def plot_decomposed_manhattan2(self, tissues=None, width=None, components=None, save_path=None): if tissues is None: tissues = np.arange(self.dims['T']) else: tissues = np.arange(self.dims['T'])[np.isin(self.tissue_ids, tissues)] if components is None: components = np.arange(self.dims['K'])[np.any((self.active > 0.5), 0)] if width is None: width = int(np.sqrt(tissues.size)) + 1 height = width else: height = int(tissues.size / width) + 1 pred = ((self.active * self.weight_means) @ (self.X @ self.pi.T).T) logp = -norm.logcdf(-np.abs(pred)) - np.log(2) pos = np.array([int(x.split('_')[1]) for x in self.snp_ids]) W = self.active * self.weight_means c = (self.X @ self.pi.T) pred = self._compute_prediction() fig, ax = plt.subplots(height, width, figsize=(width * 4, height * 3), sharey=False) ax = np.array(ax).flatten() for i, t in enumerate(tissues): ax[i].set_title('{}\nby component'.format(self.tissue_ids[t])) for k in components: predk = self._compute_prediction() - self._compute_prediction(k=k) logpk = -norm.logcdf(-np.abs(predk)) - np.log(2) if i == 0: ax[i].scatter(pos, logpk, marker='o', alpha=0.5, label='k{}'.format(k)) else: ax[i].scatter(pos, logpk, marker='o', alpha=0.5) ax[i].set_xlabel('SNP position') fig.legend() plt.tight_layout() if save_path is not None: plt.savefig(save_path) # plt.show() plt.close()
def discretized_normal_log(yvals, mean, sd): s = yvals[1] - yvals[0] #distance between points in ygrid bot_dist = norm.logcdf( x=yvals - s / 2.0, loc=mean, scale=sd ) #LOG cdf at midpoint between point in grid and previous point top_dist = norm.logcdf( x=yvals + s / 2.0, loc=mean, scale=sd) #LOG cdf at midpoint between point in grid and next point diff = top_dist + np.log( -np.expm1(bot_dist - top_dist)) #log1p difference formula diff[0] = top_dist[0] #first value should integrate from -inf diff[-1] = norm.logsf(x=ygrid[-1] - s / 2.0, loc=mean, scale=sd) #last value should integrate to inf return diff
def ppl_acq_pi(self, pmout_samp, normal=True): """ PPL-PI: PPL acquisition function algorithm for probability of improvement (PI). Parameters ---------- pmout_samp : ndarray A numpy ndarray with shape=(nsamp,). normal : bool If true, assume pmout_samp are Gaussian distributed. Returns ------- float PPL-PI acquisition function value. """ youts = np.array(pmout_samp).flatten() nsamp = youts.shape[0] y_min = self.data.y.min() if normal: mu = np.mean(youts) sig = np.std(youts) if sig < 1e-6: sig = 1e-6 piVal = -1 * norm.logcdf(y_min, loc=mu, scale=sig) else: piVal = -1 * len(np.argwhere(youts < y_min)) / float(nsamp) return piVal
def upper_bound_logpartition_41(tau, inv_alpha_1): tau_1, tau_2 = tau[:D+N], tau[D+N:] tau_1_N, tau_2_N = tau_1[D:], tau_2[D:] # first D values correspond to w #assert len(tau_1_N) == len(tau_2_N) == 0 alpha_1 = 1.0 / inv_alpha_1 inv_alpha_2 = 1 - inv_alpha_1 #if np.any(tau_1 <= 0): # print 'one of the tau_1 <= 0: setting integral_1 to INF' if np.any(tau_1 <= 0.01): #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF' integral_1 = INF2 else: integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \ + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \ + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \ + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi) mat = A - np.diag(tau_1) sign, logdet = np.linalg.slogdet(mat) if (sign < 0) or np.isinf(logdet): #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet) integral_2 = INF2 else: try: integral_2 = inv_alpha_2 * (-0.5) * (-(D+N)*np.log(inv_alpha_2) + logdet) + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2)) except np.linalg.linalg.LinAlgError: integral_2 = INF2 integral_2 += inv_alpha_2 * (N+D) * 0.5 * np.log(2 * np.pi) integral = integral_1 + integral_2 # print 'integral 41: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral) return integral
def likelihood_function( theta, y, yerr ): #theta - the input values, x = frequencies, y = our fluxes from the residual profile. #all in log values temperature, density, beta = theta #input parameters for the bb function to make the MBB everytime over the chain model = surface_brightness( frequencies, temperature, 10**density, beta ) #model in this instance = bb with the varying free parameters(frequencies, temperature, density, beta) given by mcmc model = synthetic_photometry( model, filter_array ) #Model/Likleyhood function - Synthetic photometry is the model. We need to fit our data to this model. #We need two likelihood functions. One for detection data points (val>3sigma) and one for no dection data points(val<3sigma). #1. Gaussian function - Suitable for data with a well defined value with +/- unc and we have a detection of 3sigma or higher - value > 3*val.unc=3sigma.unc. Not good for values which are only upper limits. detection = np.where(y >= 3 * yerr) likelihood = (-0.5) * np.sum(((y[detection] - model[detection])**2 / (yerr[detection]**2)) + (np.log(2 * np.pi * (yerr[detection]**2)))) #2. Cumilative Distribution function. For values where we only have an upper limit (In our data that's mostly 450 SCUBA2). To be used for data points where value < 3*value.unc=3sigma.unc. In this case 3yerr(3sgima value) which is the upper limit for that data point becomes the data point. non_detection = np.where(y < 3 * yerr) likelihood = likelihood + np.sum( norm.logcdf(3 * yerr[non_detection], model[non_detection], yerr[non_detection])) return likelihood
def plot_manhattan(self, component, thresh=0.0, save_path=None): """ make manhattan plot for tissues, colored by lead snp of a components include tissues with p(component active in tissue) > thresh """ logp = -norm.logcdf(-np.abs(self.Y)) - np.log(2) pos = np.array([int(x.split('_')[1]) for x in self.snp_ids]) #sorted_tissues = np.flip(np.argsort(self.active[:, component])) #active_tissues = sorted_tissues[self.active[sorted_tissues, component] > thresh] active_tissues = np.arange( self.dims['T'])[self.active[:, component] > thresh] fig, ax = plt.subplots(1, active_tissues.size, figsize=(5 * active_tissues.size, 4), sharey=True) for i, tissue in enumerate(active_tissues): lead_snp = self.pi.T[:, component].argmax() r2 = self.X[lead_snp]**2 ax[i].scatter(pos, logp[tissue], c=r2, cmap='RdBu_r') ax[i].set_title( 'Tissue: {}\nLead SNP {}\nweight= {:.2f}, p={:.2f}'.format( self.tissue_ids[tissue], lead_snp, self.weight_means[tissue, component], self.active[tissue, component])) ax[i].set_xlabel('SNP') ax[0].set_ylabel('-log(p)') if save_path is not None: plt.savefig(save_path) # plt.show() plt.close()
def _setup(self): super(MinValueEntropySearch, self)._setup() # Apply Gumbel sampling m = self.models[0] valid = self.feasible_data_index() # Work with feasible data X = self.data[0][valid, :] N = np.shape(X)[0] Xrand = RandomDesign(self.gridsize, self._domain).generate() fmean, fvar = m.predict_f(np.vstack((X, Xrand))) idx = np.argmin(fmean[:N]) right = fmean[idx].flatten()# + 2*np.sqrt(fvar[idx]).flatten() left = right probf = lambda x: np.exp(np.sum(norm.logcdf(-(x - fmean) / np.sqrt(fvar)), axis=0)) i = 0 while probf(left) < 0.75: left = 2. ** i * np.min(fmean - 5. * np.sqrt(fvar)) + (1. - 2. ** i) * right i += 1 # Binary search for 3 percentiles q1, med, q2 = map(lambda val: bisect(lambda x: probf(x) - val, left, right, maxiter=10000, xtol=0.01), [0.25, 0.5, 0.75]) beta = (q1 - q2) / (np.log(np.log(4. / 3.)) - np.log(np.log(4.))) alpha = med + beta * np.log(np.log(2.)) # obtain samples from y* mins = -np.log(-np.log(np.random.rand(self.num_samples).astype(np_float_type))) * beta + alpha self.samples.set_data(mins)
def _hammer_function(self, x, x0, r_x0, s_x0): ''' Creates the function to define the exclusion zones ''' return norm.logcdf((np.sqrt((np.square( np.atleast_2d(x)[:, None, :] - np.atleast_2d(x0)[None, :, :])).sum(-1)) - r_x0) / s_x0)
def upper_bound_logpartition_41(tau, inv_alpha_1): tau_1, tau_2 = tau[:D + N], tau[D + N:] tau_1_N, tau_2_N = tau_1[D:], tau_2[D:] # first D values correspond to w #assert len(tau_1_N) == len(tau_2_N) == 0 alpha_1 = 1.0 / inv_alpha_1 inv_alpha_2 = 1 - inv_alpha_1 #if np.any(tau_1 <= 0): # print 'one of the tau_1 <= 0: setting integral_1 to INF' if np.any(tau_1 <= 0.01): #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF' integral_1 = INF2 else: integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \ + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \ + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \ + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi) mat = A - np.diag(tau_1) sign, logdet = np.linalg.slogdet(mat) if (sign < 0) or np.isinf(logdet): #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet) integral_2 = INF2 else: try: integral_2 = inv_alpha_2 * (-0.5) * ( -(D + N) * np.log(inv_alpha_2) + logdet) + 0.5 * np.sum( tau_2 * np.linalg.solve(mat, tau_2)) except np.linalg.linalg.LinAlgError: integral_2 = INF2 integral_2 += inv_alpha_2 * (N + D) * 0.5 * np.log(2 * np.pi) integral = integral_1 + integral_2 # print 'integral 41: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral) return integral
def calculate_p(z_scores: np.array) -> np.array: """ Function that calculates P for the MAMA results :param z_scores: Z scores :return: P values for MAMA (as strings, to allow for very large negative exponents) """ # Since P = 2 * normal_cdf(-|Z|), P = e ^ (log_normal_cdf(-|Z|) + ln 2) # This can be changed to base 10 as P = 10 ^ ((log_normal_cdf(-|Z|) + ln 2) / ln 10) log_10_p = RECIP_LN_10 * (norm.logcdf(-np.abs(z_scores)) + LN_2) # Break up the log based 10 of P values into the integer and fractional part # To handle the case of Z = 0 (and not result in "10e-1"), set initial values to (-1.0, 1.0) frac_part, int_part = np.full_like(z_scores, -1.0), np.full_like(z_scores, 1.0) np.modf(log_10_p, out=(frac_part, int_part), where=(z_scores != 0.0)) # Construct strings for the P values # 1) Add one to the fractional part to ensure that the result mantissa is between 1 and 10 # 2) Subtract one from the integer part to compensate and keep the overall value correct result = np.char.add( np.char.add(np.power(10.0, (frac_part + 1.0)).astype(str), 'e'), (int_part - 1).astype(int).astype(str)) return result
def q2qnbinom(counts, input_mean, output_mean, dispersion): """ Quantile to Quantile for a negative binomial """ zero = logical_or(input_mean < 1e-14, output_mean < 1e-14) input_mean[zero] = input_mean[zero] + 0.25 output_mean[zero] = output_mean[zero] + 0.25 ri = 1 + multiply(np.matrix(dispersion).T, input_mean) vi = multiply(input_mean, ri) rO = 1 + multiply(np.matrix(dispersion).T, output_mean) vO = multiply(output_mean, rO) i = counts >= input_mean low = logical_not(i) p1 = empty(counts.shape, dtype=np.float64) p2 = p1.copy() q1, q2 = p1.copy(), p1.copy() if i.any(): p1[i] = norm.logsf(counts[i], loc=input_mean[i], scale=np.sqrt(vi[i]))[0, :] p2[i] = gamma.logsf(counts[i], (input_mean / ri)[i], scale=ri[i])[0, :] q1[i] = norm.ppf(1 - np.exp(p1[i]), output_mean[i], np.sqrt(vO[i]))[0, :] q2[i] = gamma.ppf(1 - np.exp(p2[i]), np.divide(output_mean[i], rO[i]), scale=rO[i])[0, :] if low.any(): p1[low] = norm.logcdf(counts[low], loc=input_mean[low], scale=np.sqrt(vi[low]))[0, :] p2[low] = gamma.logcdf(counts[low], input_mean[low] / ri[low], scale=ri[low])[0, :] q1[low] = norm.ppf(np.exp(p1[low]), loc=output_mean[low], scale=np.sqrt(vO[low]))[0, :] q2[low] = gamma.ppf(np.exp(p2[low]), output_mean[low] / rO[low], scale=rO[low])[0, :] return (q1 + q2) / 2
def marginal_pdf_distance(r, rmin, rmax, mu, sigma, mlim): """ Calculate the expected marginal distribution of distances given the parallax survey parameters. The calculation is only approximate as a the magnitude limit is applied to the error-free true apparent magnitude. Parameters ---------- r : float vector Values of r for which to calculate p(r). rmin : float Minimum distance in survey. rmax : float Maximum distance in survey. mu : float Mean of the true absolute magnitude distribution. sigma : float Standard deviation of the true absolute magnitude distribution. mlim : float Apparent magnitude limit of the survey. Returns ------- p(r) as float vector. """ A = rmax**3 - rmin**3 pdf = lambda x: np.exp( np.log(3) - np.log(A) + 2 * np.log(x) + norm.logcdf( mlim - mu - 5 * np.log10(x) + 5, scale=sigma)) C, dummy = quad(pdf, rmin, rmax) return pdf(r) / C
def __init__(self, K, Y, init=None, threshold=1e-9): N = np.shape(K)[0] f = np.zeros((N,1)) converged = False k = 0 innerC = 0 for i in xrange(N): pdfDiff = norm.logpdf(f) - norm.logcdf(Y*f) W = np.exp(2*pdfDiff) + Y*f*np.exp(pdfDiff) Wsqrt = np.sqrt(W) Wdiag= np.diag(Wsqrt.flatten()) B = np.identity(N) + np.dot(Wdiag, np.dot(K, Wdiag)) grad = Y*np.exp(pdfDiff) b = W*f + grad interim = np.dot(Wdiag, np.dot(K, b)) cgRes = Cg(B, interim, threshold=threshold) s1 = cgRes.result innerC = innerC + cgRes.iterations a = b - Wsqrt*s1 if(converged): break f_prev = f f = np.dot(K, a) diff = f - f_prev if (np.dot(diff.T,diff).flatten() < threshold*N or innerC>15000): converged = True k = k+1 self.result = f self.iterations = k + innerC
def upper_bound_logpartition(tau, inv_alpha_1): tau_1, tau_2 = tau[:D+N], tau[D+N:] tau_1_N, tau_2_N = tau_1[D:], tau_2[D:] # first D values correspond to w alpha_1 = 1.0 / inv_alpha_1 inv_alpha_2 = 1 - inv_alpha_1 # if np.any(tau_1 <= 0.01): # print 'one of the tau_1 <= 0.01: setting integral_1 to INF' if np.any(tau_1 <= 0): # print 'one of the tau_1 <= 0: setting integral_1 to INF' # if np.any(tau_1 <= 0) or np.any(tau_1 > min_eigvals_A): integral_1 = INF2 else: integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1)) ) \ + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \ + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) mat = A - np.diag(tau_1) sign, logdet = np.linalg.slogdet(mat) if (sign <= 0) or np.isinf(logdet): print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet) integral_2 = INF2 else: try: integral_2 = -0.5 * inv_alpha_2 * (-(D+N)*np.log(inv_alpha_2) + logdet) \ + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2)) except np.linalg.linalg.LinAlgError: integral_2 = INF2 integral = integral_1 + integral_2 return integral
def upper_bound_logpartition(tau, inv_alpha_1): tau_1, tau_2 = tau[:D+N], tau[D+N:] tau_1_N, tau_2_N = tau_1[D:], tau_2[D:] # first D values correspond to w alpha_1 = 1.0 / inv_alpha_1 inv_alpha_2 = 1 - inv_alpha_1 if np.any(tau_1 <= 0): # if np.any(tau_1 <= 0) or np.any(tau_1 > min_eigvals_A): print 'one of the tau_1 <= 0: setting integral_1 to INF' integral_1 = INF2 else: integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1)) ) \ + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \ + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) mat = A - np.diag(tau_1) sign, logdet = np.linalg.slogdet(mat) if (sign <= 0) or np.isinf(logdet): print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet) integral_2 = INF2 else: try: integral_2 = -0.5 * inv_alpha_2 * ((D+N)*np.log(inv_alpha_2) + logdet) \ + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2)) except np.linalg.linalg.LinAlgError: integral_2 = INF2 integral = integral_1 + integral_2 return integral
def predict(self, Y, X, parameter_sample): """Given a sample of (X,Y) as well as a sample of network parameters, compute p_{\theta}(Y|X) and compare against the actual values of Y""" # first comupute coefficients * X; shape (n,B) inner_products = np.matmul(X, np.transpose(parameter_sample)) # Second, use the normal cdf to transform into [0,1] log_probs = norm.logcdf(inner_products) # compute predictions #predictions = (np.exp(log_probs) > 0.5).astype('float') # #print("[np.where(probs > 0.5)]", list(zip(np.where(mat > 0.5)[0], np.where(mat > 0.5)[1])).shape) # predictions = np.zeros((n,B)) # tuples = np.where(np.exp(log_probs) > 0.5) # indices = np.array(list(zip(tuples[0], tuples[1]))) # predictions[indices] = 1 # use predictions for accuracy computation ae = np.abs(np.exp(log_probs) - Y[:, np.newaxis]) # compute cross-entropy cross_entropy = -(log_probs * Y[:, np.newaxis] + np.logaddexp(0, -log_probs) * (1.0 - Y[:, np.newaxis])) return (log_probs, ae, cross_entropy)
def MES(f_x: NDArray[float], pred_mu: NDArray[float], pred_var: NDArray[float], k: int) -> float: y_star = f_x.max(axis=0) y_sample = np.tile(y_star, (pred_mu.shape[0], 1)) gamma_y = (y_sample.T - pred_mu) / np.sqrt(pred_var) #gamma_y D*K配2 print(gamma_y) print(np.mean(gamma_y, axis=1)) print(np.var(gamma_y, axis=1)) fig = plt.figure(figsize=(20, 10)) ax1 = fig.add_subplot(2, 2, 1) plt.title("distribution") ax1.plot(X.ravel(), y_sample.T.ravel(), "g", label="y_star") ax1.plot(X.ravel(), pred_mu, "b", label="pred_mu") ax1.plot(X.ravel(), ((y_sample.T - pred_mu.ravel())).ravel(), "r", label="y_star-pred_mu") ax1.legend(loc="lower left", prop={'size': 8}) ax2 = fig.add_subplot(2, 2, 2) ax2.plot(X.ravel(), gamma_y.ravel(), "r", label="gamma_y") ax2.legend(loc="upper left", prop={'size': 8}) ax3 = fig.add_subplot(2, 2, 3) #ax3.plot(X.ravel(),((y_sample.T-pred_mu.ravel())).ravel(),"r",label="pred_mu-pred_var") ax3.plot(X.ravel(), np.sqrt(pred_var), "g", label="sqrt(pred_var)") ax3.legend(loc="lower left", prop={'size': 8}) plt.savefig(result_dir_path + savefig_pass + str(seed) + "/gamma_y" + ".pdf") #plt.show() plt.close() #sys.exit() psi_gamma = norm.pdf(gamma_y, loc=pred_mu, scale=np.sqrt(pred_var)) fig = plt.figure(figsize=(20, 10)) ax1 = fig.add_subplot(2, 2, 1) plt.title("distribution") ax1.plot(X.ravel(), gamma_y.ravel(), "r", label="gamma_y") ax1.legend(loc="upper left", prop={'size': 8}) ax2 = fig.add_subplot(2, 2, 2) ax2.plot(X.ravel(), psi_gamma.ravel(), "r", label="psi_gamma") ax2.legend(loc="upper left", prop={'size': 8}) large_psi_gamma = norm.cdf(gamma_y, loc=pred_mu, scale=np.sqrt(pred_var)) ax3 = fig.add_subplot(2, 2, 3) ax3.plot(X.ravel(), large_psi_gamma.ravel(), "r", label="large_psi_gamma") ax3.legend(loc="lower left", prop={'size': 8}) ax4 = fig.add_subplot(2, 2, 4) log_large_psi_gamma = norm.logcdf(gamma_y, loc=pred_mu, scale=np.sqrt(pred_var)) ax4.plot(X.ravel(), log_large_psi_gamma.ravel(), "r", label="log_large_psi_gamma") ax4.legend(loc="lower left", prop={'size': 8}) temp = np.divide((gamma_y * psi_gamma), (2 * large_psi_gamma), out=np.zeros_like(gamma_y * psi_gamma), where=(2 * large_psi_gamma) != 0) - log_large_psi_gamma alpha = np.sum(temp, axis=0) / k plt.savefig(result_dir_path + savefig_pass + str(seed) + "/distribution_" + ".pdf") plt.close() return alpha
def sep_logpdf(x, mu=0., sigma=1., nu=0, tau=2): z = (x - mu) / sigma w = np.sign(z) * np.abs(z)**(tau / 2) * nu * np.sqrt(2. / tau) # Note: There is a sigma division in the paper logp = np.log(2) + norm.logcdf(w) + ep2_logpdf(x, mu, sigma, tau) return logp
def log_probability_via_sampling(means: np.ndarray, stdevs: np.ndarray, n_draws: int) -> np.ndarray: # TODO: Currently expects means and stdevs to be 1D. Maybe could do n-d. # TODO: This could really do with the odd unit test. draws = np.random.normal(means, stdevs, size=(n_draws, means.shape[0])) # OK, now to do the logsumexp trick. pre_factor = -np.log(n_draws) presence_log_probs = norm.logcdf(draws) absence_log_probs = norm.logcdf(-draws) presence_results = logsumexp(pre_factor + presence_log_probs, axis=0) absence_results = logsumexp(pre_factor + absence_log_probs, axis=0) return np.stack([absence_results, presence_results], axis=1)
def _logcdf(self, x, stddev_ratio): norm_arg = self._norm_pdf_arg(x, stddev_ratio) return numpy.where( x < 0, (numpy.log(2.0 / (stddev_ratio + 1)) + norm.logcdf(norm_arg)), numpy.log((1.0 + stddev_ratio * (2.0 * norm.cdf(norm_arg) - 1.0)) / (stddev_ratio + 1)))
def log_likelihood(self, smis, **targets): def _avoid_overflow(ll_): # log(exp(log(UP) - log(C)) - exp(log(LOW) - log(C))) + log(C) # where C = max(log(UP), max(LOW)) ll_c = np.max(ll_) ll_ = np.log(np.exp(ll_[1] - ll_c) - np.exp(ll_[0] - ll_c)) + ll_c return ll_ # self.update_targets(reset=False, **targets): for k, v in targets.items(): if not isinstance(v, tuple) or len(v) != 2 or v[1] <= v[0]: raise ValueError('must be a tuple with (low, up) boundary') self._targets[k] = v if not self._targets: raise RuntimeError('<targets> is empty') ll = pd.DataFrame(np.full((len(smis), len(self._mdl)), -1000.0), columns=self._mdl.keys()) pred = self.predict(smis).reset_index(drop=True) tmp = pred.isna().any(axis=1) idx = [i for i in range(len(smis)) if ~tmp[i]] # calculate likelihood for k, (low, up) in self._targets.items(): # k: target; v: (low, up) # predict mean, std for all smiles mean, std = pred[k + ': mean'], pred[k + ': std'] # calculate low likelihood low_ll = norm.logcdf(low, loc=np.asarray(mean), scale=np.asarray(std)) # calculate up likelihood up_ll = norm.logcdf(up, loc=np.asarray(mean), scale=np.asarray(std)) # zip low and up likelihood to a 1-dim array then save it. # like: [(tar_low_smi1, tar_up_smi1), (tar_low_smi2, tar_up_smi2), ..., (tar_low_smiN, tar_up_smiN)] lls = zip(low_ll, up_ll) ll[k].iloc[idx] = np.array([*map(_avoid_overflow, list(lls))]) return ll
def loglik(mu): ll = -sum(norm.logpdf(values[not_censored], loc=mu, scale=std_est)) if n_left_cens > 0: ll -= sum( norm.logcdf(values[left_censored], loc=mu, scale=std_est)) if n_right_cens > 0: ll -= sum( norm.logsf(values[right_censored], loc=mu, scale=std_est)) return ll
def upper_bound_logpartition_43(tau, inv_alpha_1): tau_1, tau_2 = tau[:D + N], tau[D + N:] tau_1_N, tau_2_N = tau_1[D:], tau_2[D:] # first D values correspond to w alpha_1 = 1.0 / inv_alpha_1 inv_alpha_2 = 1 - inv_alpha_1 alpha_2 = 1. / inv_alpha_2 use_exact_integral_1 = True use_exact_integral_2 = True if use_exact_integral_1: if np.any(tau_1 <= 0.01): #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF' integral_1 = INF2 else: integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \ + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \ + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \ + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi) else: L = np.ones( N) * 0.01 # to avoid integrating the step function over reals ints = np.zeros(N) tau_1_mod = alpha_1 * tau_1 tau_2_mod = alpha_1 * tau_2 for i in range(N): if L[i] / inv_alpha_1 < tau_1_mod[ i]: # numerical check that the integral is finite ints[i] = np.log( quad( lambda t: np.exp( log_step_func(t) / inv_alpha_1 - 0.5 * tau_1_mod[i] * np.power(t, 2) + tau_2_mod[i] * t), -np.inf, np.inf)[0]) #ints[i] = np.log(quad(lambda t: np.exp(- 0.5*tau_1_mod[i]*np.power(t, 2) + tau_2_mod[i]*t),-np.inf,np.inf)[0]) else: ints[i] = np.inf break integral_1 = inv_alpha_1 * np.sum(ints) if use_exact_integral_2: mat = A - np.diag(tau_1) sign, logdet = np.linalg.slogdet(mat) if (sign < 0) or np.isinf(logdet): #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet) integral_2 = INF2 else: try: integral_2 = inv_alpha_2 * (-0.5) * ( -(D + N) * np.log(inv_alpha_2) + logdet) + 0.5 * np.sum( tau_2 * np.linalg.solve(mat, tau_2)) except np.linalg.linalg.LinAlgError: integral_2 = INF2 integral_2 += inv_alpha_2 * (N + D) * 0.5 * np.log(2 * np.pi) else: integral_2 = inv_alpha_2 * gauss_integral( alpha_2 * (A - np.diag(tau_1)), -alpha_2 * tau_2) integral = integral_1 + integral_2 # print 'integral 43: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral) return integral
def log_likelihood(self, smis, **targets): def _avoid_overflow(ll_): # log(exp(log(UP) - log(C)) - exp(log(LOW) - log(C))) + log(C) # where C = max(log(UP), max(LOW)) ll_c = np.max(ll_) ll_ = np.log(np.exp(ll_[1] - ll_c) - np.exp(ll_[0] - ll_c)) + ll_c return ll_ ll = np.repeat(-1000.0, len(smis)) tar_fps = self._descriptor.transform(smis) tar_fps = pd.DataFrame(data=tar_fps).reset_index(drop=True) tmp = tar_fps.isna().any(axis=1) idx = [i for i in range(len(smis)) if ~tmp[i]] tar_fps.dropna(inplace=True) # calculate likelihood ll_mat = [] for k, (low, up) in targets.items(): # k: target; v: (low, up) # predict mean, std for all smiles mean, std = self._mdl[k].predict(tar_fps, return_std=True) # calculate low likelihood low_ll = norm.logcdf(low, loc=np.asarray(mean), scale=np.asarray(std)) # calculate up likelihood up_ll = norm.logcdf(up, loc=np.asarray(mean), scale=np.asarray(std)) # zip low and up likelihood to a 1-dim array then save it. # like: [(tar_low_smi1, tar_up_smi1), (tar_low_smi2, tar_up_smi2), ..., (tar_low_smiN, tar_up_smiN)] lls = zip(low_ll, up_ll) ll_mat.append(list(lls)) # sum all ll along each smiles # ll_sum = [[sum_low_smi1, sum_up_smi1], [sum_low_smi2, sum_up_smi2],...,[sum_low_smiN, sum_up_smiN],] ll_sum = np.sum(np.array(ll_mat), axis=0) tmp = np.array([*map(_avoid_overflow, ll_sum)]) np.put(ll, idx, tmp) return ll
def _get_f_map(self): """Computes maximum a posterior (MAP) evaluation of f given the data using Newton's method Returns: MAP of the Gassian processes values at current datapoints """ converged = False try_no = 0 f_map = None # Newton's method to approximate f_MAP while not converged and try_no < 1: # randomly initialise f_map f_map = self.random_state.uniform(0., 1., self.datapoints.shape[0]) for m in range(100): # compute Z f_sup = np.array([ f_map[self.comparisons[i, 0]] for i in range(self.comparisons.shape[0]) ]) f_inf = np.array([ f_map[self.comparisons[i, 1]] for i in range(self.comparisons.shape[0]) ]) Z = self._get_Z(f_sup, f_inf) Z_logpdf = norm.logpdf(Z) Z_logcdf = norm.logcdf(Z) # compute b b = self._get_b(Z_logpdf, Z_logcdf) # compute gradient g g = self._get_g(f_map, b) # compute hessian H C = self._get_C(Z) H = -self.K_inv + C H_inv = self._get_inv(H) # perform update update = np.dot(H_inv, g) f_map -= update # stop criterion if np.linalg.norm(update) < 0.0001: converged = True break if not converged: print("Did not converge.") try_no += 1 return f_map
def log_target(be, x, constraints_funcs): """ log (phi(beta * g(x)) Parameters ---------- x: """ return np.sum([norm.logcdf(be * g(x)) for g in constraints_funcs])
def impl(self, logEI, glogEI, diff, sigma): z = diff / sigma if z < 34: logcdf = norm.logcdf(-z, 0, 1) ddiff = -np.exp(logcdf - logEI) # aka: -cdf / EI else: foo = 2 * .49903031 dz = (-0.12442506 - foo * z) ddiff = dz / sigma return ddiff * glogEI
def evaluate(self, x: np.ndarray) -> np.ndarray: """ Evaluates the penalization function value """ if self.x_batch is None: return np.ones((x.shape[0], 1)) distances = _distance_calculation(x, self.x_batch) normalized_distance = (distances - self.radius) / self.scale return norm.logcdf(normalized_distance).sum(axis=1, keepdims=True)
def evaluate_with_gradients(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Evaluates the penalization function value and gradients with respect to x """ if self.x_batch is None: return np.ones((x.shape[0], 1)), np.zeros(x.shape) distances, d_dist_dx = _distance_with_gradient(x, self.x_batch) normalized_distance = (distances - self.radius) / self.scale h_func = norm.cdf(normalized_distance) d_value_dx = 0.5 * (1 / h_func[:, :, None]) \ * norm.pdf(normalized_distance)[:, :, None] \ * d_dist_dx / self.scale[None, :, None] return norm.logcdf(normalized_distance).sum(1, keepdims=True), d_value_dx.sum(1)
def upper_bound_logpartition_43(tau, inv_alpha_1): tau_1, tau_2 = tau[:D+N], tau[D+N:] tau_1_N, tau_2_N = tau_1[D:], tau_2[D:] # first D values correspond to w alpha_1 = 1.0 / inv_alpha_1 inv_alpha_2 = 1 - inv_alpha_1 alpha_2 = 1. / inv_alpha_2 use_exact_integral_1 = True use_exact_integral_2 = True if use_exact_integral_1: if np.any(tau_1 <= 0.01): #print 'one of the tau_1 <= 0.01/inv_alpha_1: setting integral_1 to INF' integral_1 = INF2 else: integral_1 = inv_alpha_1 * (-0.5 * ((D+N)*np.log(alpha_1) + np.sum(np.log(tau_1))) \ + np.sum(norm.logcdf(np.sqrt(alpha_1)*tau_2_N/np.sqrt(tau_1_N)))) \ + 0.5 * np.sum(np.power(tau_2, 2) / tau_1) \ + inv_alpha_1 * (N+D) * 0.5 * np.log(2 * np.pi) else: L = np.ones(N) * 0.01 # to avoid integrating the step function over reals ints = np.zeros(N) tau_1_mod = alpha_1 * tau_1 tau_2_mod = alpha_1 * tau_2 for i in range(N): if L[i]/inv_alpha_1 < tau_1_mod[i]: # numerical check that the integral is finite ints[i] = np.log(quad(lambda t: np.exp(log_step_func(t)/inv_alpha_1 - 0.5*tau_1_mod[i]*np.power(t, 2) + tau_2_mod[i]*t),-np.inf,np.inf)[0]) #ints[i] = np.log(quad(lambda t: np.exp(- 0.5*tau_1_mod[i]*np.power(t, 2) + tau_2_mod[i]*t),-np.inf,np.inf)[0]) else: ints[i] = np.inf break integral_1 = inv_alpha_1 * np.sum(ints) if use_exact_integral_2: mat = A - np.diag(tau_1) sign, logdet = np.linalg.slogdet(mat) if (sign < 0) or np.isinf(logdet): #print 'sign = %s, logdet = %s, setting integral_2 to INF' % (sign, logdet) integral_2 = INF2 else: try: integral_2 = inv_alpha_2 * (-0.5) * (-(D+N)*np.log(inv_alpha_2) + logdet) + 0.5 * np.sum(tau_2 * np.linalg.solve(mat, tau_2)) except np.linalg.linalg.LinAlgError: integral_2 = INF2 integral_2 += inv_alpha_2 * (N+D) * 0.5 * np.log(2 * np.pi) else: integral_2 = inv_alpha_2 * gauss_integral(alpha_2 * (A - np.diag(tau_1)), -alpha_2 * tau_2) integral = integral_1 + integral_2 # print 'integral 43: integral_1 = %.3f, integral_2 = %.3f, integral = %.3f' % (integral_1, integral_2, integral) return integral
def perform(self, node, inputs, output_storage): logEI, gEI, diff, sigma = inputs z = diff / sigma logcdf = norm.logcdf(-z, 0, 1) logpdf = norm.logpdf(-z, 0, 1) #for zi, a, b, c in zip(z, logcdf, logpdf, logEI): #print zi, 'cdf', a, 'pdf', b, 'EI', c, 'logdz', a - c, 'logsig', b - c dz = -np.exp(logcdf - logEI) # aka: -cdf / EI dsigma = np.exp(logpdf - logEI) # aka: pdf / EI #if np.any(z > 20): # print 'NormalLogEIGrad: bigz', z[z > 20] foo = 2 * .49903031 dz[z > 34] = -0.12442506 - foo * z[z > 34] dsigma[z > 34] = dz[z > 34] * (-z[z > 34] / sigma[z > 34]) dz[z > 34] /= sigma[z > 34] output_storage[0][0] = dz * gEI output_storage[1][0] = dsigma * gEI
def pnorm(x, mean=0, sd=1, lowertail=True, log=False): """ ============================================================================ pnorm() ============================================================================ The cumulative distribution function for the normal distribution. You provide a value along the normal distribution (eg x=3) or array of values, and it returns what proportion of values lie below it (the quantile) Alternatively, if you select lowertail=False, it returns the proportion of values that are above it. USAGE: cnorm(mean=0, sd=1, type="equal", conf=0.95) dnorm(x, mean=0, sd=1, log=False) pnorm(q, mean=0, sd=1, lowertail=True, log=False) qnorm(p, mean=0, sd=1, lowertail=True, log=False) rnorm(n=1, mean=0, sd=1) :param x (float, array of floats): The values along the distribution. :param mean (float): mean of the distribution :param sd (float): standard deviation :param lowertail (bool): are you interested in what proportion of values lie beneath x? or above x (false)? :param log (bool): take the log? :return: an array of quantiles() corresponding to the values in x ============================================================================ """ if lowertail and not log: return norm.cdf(x, loc=mean, scale=sd) elif not lowertail and not log: return norm.sf(x, loc=mean, scale=sd) elif lowertail and log: return norm.logcdf(x, loc=mean, scale=sd) else: return norm.logsf(x, loc=mean, scale=sd)
def lnlike_limit(theta, x_limit, y_limit, yerr_limit=0.1): """Non-detections.""" m, b = theta model = m * x_limit + b return np.sum(norm.logcdf(model - y_limit, scale=yerr_limit))
def compute(self, X, derivative=False, **kwargs): """ A call to the object returns the log(EI) and derivative values. :param X: The point at which the function is to be evaluated. :type X: np.ndarray (1,D) :param incumbent: The current incumbent :type incumbent: np.ndarray (1,D) :param derivative: This controls whether the derivative is to be returned. :type derivative: Boolean :return: The value of log(EI) :rtype: np.ndarray(1, 1) :raises BayesianOptimizationError: if X.shape[0] > 1. Only single X can be evaluated. """ if derivative: print("LogEI does not support derivative calculation until now") return if np.any(X < self.X_lower) or np.any(X > self.X_upper): return np.array([[- np.finfo(np.float).max]]) m, v = self.model.predict(X) incumbent, _ = self.compute_incumbent(self.model) eta, _ = self.model.predict(np.array([incumbent])) f_min = eta - self.par s = np.sqrt(v) z = (f_min - m) / s log_ei = np.zeros((m.size, 1)) for i in range(0, m.size): mu, sigma = m[i], s[i] # par_s = self.par * sigma # Degenerate case 1: first term vanishes if np.any(abs(f_min - mu)) == 0: if sigma > 0: log_ei[i] = np.log(sigma) + norm.logpdf(z[i]) else: log_ei[i] = -np.Infinity # Degenerate case 2: second term vanishes and first term has a special form. elif sigma == 0: if mu < np.any(f_min): log_ei[i] = np.log(f_min - mu) else: log_ei[i] = -np.Infinity # Normal case else: b = np.log(sigma) + norm.logpdf(z[i]) # log(y+z) is tricky, we distinguish two cases: if np.any(f_min > mu): # When y>0, z>0, we define a=ln(y), b=ln(z). # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ], # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|)) a = np.log(f_min - mu) + norm.logcdf(z[i]) log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a))) else: # When y<0, z>0, we define a=ln(-y), b=ln(z), and it has to be true that b >= a in order to satisfy y+z>=0. # Then y+z = exp[ b + ln(exp(b-a) -1) ], # and thus log(y+z) = a + ln(exp(b-a) -1) a = np.log(mu - f_min) + norm.logcdf(z[i]) if a >= b: # a>b can only happen due to numerical inaccuracies or approximation errors log_ei[i] = -np.Infinity else: log_ei[i] = b + np.log(1 - np.exp(a - b)) return log_ei
def _pln_logpdf(x, alpha, nu, tau2): return np.log(alpha) + alpha * nu + alpha * tau2 / 2 - \ (alpha + 1) * np.log(x) + \ norm.logcdf((np.log(x) - nu - alpha * tau2) / np.sqrt(tau2))
def log_cdf(self, s): return norm.logcdf(s, loc=self.mu, scale=self.std)
def compute(self, X, derivative=False, **kwargs): """ Computes the Log EI value and its derivatives. Parameters ---------- X: np.ndarray(1, D), The input point where the acquisition function should be evaluate. The dimensionality of X is (N, D), with N as the number of points to evaluate at and D is the number of dimensions of one X. derivative: Boolean If is set to true also the derivative of the acquisition function at X is returned Not implemented yet! Returns ------- np.ndarray(1,1) Log Expected Improvement of X np.ndarray(1,D) Derivative of Log Expected Improvement at X (only if derivative=True) """ if derivative: logger.error("LogEI does not support derivative \ calculation until now") return if np.any(X < self.X_lower) or np.any(X > self.X_upper): return np.array([[- np.finfo(np.float).max]]) m, v = self.model.predict(X) _, eta = self.rec.estimate_incumbent(None) f_min = eta - self.par s = np.sqrt(v) z = (f_min - m) / s log_ei = np.zeros((m.size, 1)) for i in range(0, m.size): mu, sigma = m[i], s[i] # par_s = self.par * sigma # Degenerate case 1: first term vanishes if np.any(abs(f_min - mu)) == 0: if sigma > 0: log_ei[i] = np.log(sigma) + norm.logpdf(z[i]) else: log_ei[i] = -np.Infinity # Degenerate case 2: second term vanishes and first term # has a special form. elif sigma == 0: if mu < np.any(f_min): log_ei[i] = np.log(f_min - mu) else: log_ei[i] = -np.Infinity # Normal case else: b = np.log(sigma) + norm.logpdf(z[i]) # log(y+z) is tricky, we distinguish two cases: if np.any(f_min > mu): # When y>0, z>0, we define a=ln(y), b=ln(z). # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ], # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|)) a = np.log(f_min - mu) + norm.logcdf(z[i]) log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a))) else: # When y<0, z>0, we define a=ln(-y), b=ln(z), # and it has to be true that b >= a in # order to satisfy y+z>=0. # Then y+z = exp[ b + ln(exp(b-a) -1) ], # and thus log(y+z) = a + ln(exp(b-a) -1) a = np.log(mu - f_min) + norm.logcdf(z[i]) if a >= b: # a>b can only happen due to numerical inaccuracies # or approximation errors log_ei[i] = -np.Infinity else: log_ei[i] = b + np.log(1 - np.exp(a - b)) return log_ei
def _hammer_function(self, x,x0,r_x0, s_x0): ''' Creates the function to define the exclusion zones ''' return norm.logcdf((np.sqrt((np.square(np.atleast_2d(x)[:,None,:]-np.atleast_2d(x0)[None,:,:])).sum(-1))- r_x0)/s_x0)
def rma_bg_correct(Y, make_copy = False): """RMA background correction. Parameters ---------- Y: np.ndarray (ndim = 1, dtype = np.float32) The microarray intensity values (on a linear scale). make_copy: bool Whether or to make a copy of the data or modify it in-place. """ assert isinstance(Y, np.ndarray) if make_copy: Y = Y.copy() n = Y.shape[1] for j in range(n): # find missing data (= NaN) missing = np.isnan(Y[:,j]) y = Y[~missing,j] ### estimate mu using simple binning (histogram) # use a fixed number of bins num_bins = 100 lower = np.amin(y) upper = np.percentile(y, 75.0) bin_width = max(floor((upper - lower) / num_bins), 1.0) bin_edges = np.arange(lower, upper, bin_width) num_bins = bin_edges.size - 1 # binning binned = np.digitize(y, bins = bin_edges) - 1 binned = binned[binned < num_bins] bc = np.bincount(binned) amax = np.argmax(bc) max_x = lower + (amax + 0.5) * bin_width mu = max_x logger.debug('Mu: %.2f', mu) ### estimate sigma # 1. Select probes with values smaller than mu y_low = y[y < mu] # 2. Estimate their standard deviation (using mu as the mean) sigma = pow(np.sum(np.power(y_low - mu, 2.0)) / (y_low.size - 1), 0.5) # 3. Arbitrarily multiply standard deviation by square root of two sigma *= pow(2.0, 0.5) logger.debug('Sigma: %.2f', sigma) ### estimate alpha # we simply fix alpha to 0.03 alpha = 0.03 ### calculate background-corrected intensities a = y - mu - alpha * pow(sigma, 2.0) y_adj = a + sigma * np.exp(norm.logpdf(a / sigma) - norm.logcdf(a / sigma)) Y[~missing,j] = y_adj return Y
nu = Ne - 1 L = len(matrix) for i in xrange(trials): ep = score_seq(matrix, random_site(L)) acc += (1/(1+exp(ep-mu)))**(Ne-1) mean_Zs = acc / trials return L * log(4) + log(mean_Zs) def log_ZM_naive((matrix, mu, Ne), N, trials=1000): return N * log_ZS_naive((matrix, mu, Ne), trials=1000) def log_ZS_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean,matrix)) mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix))) log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma) log_Zs = L * log(4) + log_perc_below_threshold return log_Zs def log_ZM_hack((matrix, mu, Ne), N): log_ZS = log_ZS_hack((matrix, mu, Ne), N) return N * log_ZS def log_Z_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean,matrix)) mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix))) log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma) log_Zs = L * log(4) + log_perc_below_threshold ans_ref = ((N*L * log(4)) + log_perc_below_threshold) ans = N * log_Zs
def log_mog_cdf(w, k_vec, mu_vec, sigma_vec): eps = 1.e-300 exp_term = norm.logcdf(w, loc = mu_vec, scale = sigma_vec+eps) coefficients = k_vec return logsumexp(exp_term, b = coefficients)