def posterior1(c): exp_c = np.exp(c) #return likelihood1(c)+norm.logpdf(float(c[4]),init_luci_log,1.)+norm.logpdf(float(c[2]),0.,2.0)# +c[0]+c[1]+c[2]+c[3]#+c[0]+c[1]+c[2]-num_rep*c[3]+sum(c[4:4+num_rep]) #return likelihood1(c) +c[0]+c[1]+c[2]-c[3]+c[4] #return likelihood1(c)+norm.logpdf(float(c[4]),init_luci_log,1.)+norm.logpdf(float(c[2]),-4.0,2.0)# +c[0]+c[1]+c[2]+c[3]#+c[0]+c[1]+c[2]-num_rep*c[3]+sum(c[4:4+num_rep]) return likelihood1(c) + expon.logpdf( float(exp_c[0] / exp_c[3]), loc=0, scale=10000) + expon.logpdf( float(exp_c[1]), loc=0, scale=10000) + expon.logpdf( float(exp_c[2]), loc=0, scale=10000) + expon.logpdf( float(exp_c[3]), loc=0, scale=10000) + expon.logpdf( float(exp_c[4] / exp_c[3]), loc=0, scale=10000) + c[0] + c[1] + c[2] - c[3] + c[4]
def posterior2(c): exp_c = np.exp(c) # sum_prior_m0 = 0 # for i in range(0,1): # sum_prior_m0 += norm.logpdf(exp_c[i+4]/exp_c[3],m0_1,1.) #return likelihood2(c)+ c[0]+c[1]+c[2]-c[3]+c[4]#+sum_prior_m0 #return likelihood2(c) + norm.logpdf(c[4],np.log(m0_1*k_const),1.) return likelihood2(c) + expon.logpdf( float(exp_c[0] / exp_c[3]), loc=0, scale=10000) + expon.logpdf( float(exp_c[1]), loc=0, scale=10000) + expon.logpdf( float(exp_c[2]), loc=0, scale=10000) + expon.logpdf( float(exp_c[3]), loc=0, scale=10000) + expon.logpdf( float(exp_c[4] / exp_c[3]), loc=0, scale=10000) + c[0] + c[1] + c[2] - c[3] + c[4]
def log_prior(self, theta): """ Returns the (log) prior probability of parameters theta. TODO NOTE: The returned quantity is an improper prior as its integral over the parameter space is not equal to 1. Parameters ---------- theta : array_like An array giving the autocorrelation parameter(s). Returns ------- log_p : float The (log) prior probability of parameters theta. An improper probability. """ theta_gp, theta_nn = self._parse_theta(theta) if self.prior_nn_scale == np.inf: prior_nn = 0.0 else: prior_nn = norm.logpdf(theta_nn, scale=self.prior_nn_scale).sum() if self.prior_gp_scale == np.inf: prior_gp = 0.0 else: prior_gp = expon.logpdf(theta_gp, scale=self.prior_gp_scale).sum() return prior_nn + prior_gp
def log_exponential_density(X, rates): n_samples = len(X) nmix = len(rates) log_prob = np.empty((n_samples, nmix)) for c, rate in enumerate(rates): log_prob[:, c] = expon.logpdf(X, scale=1/rate) return log_prob
def log_exponential_density(X, rates): n_samples = len(X) nmix = len(rates) log_prob = np.empty((n_samples, nmix)) for c, rate in enumerate(rates): log_prob[:, c] = expon.logpdf(X, scale=1 / rate) return log_prob
def log_prior(logL0, a,b,B_l, sigma, M): if any(x<0 for x in (logL0,sigma)): return -np.inf if np.any(M<0): #masses have to be positive return -np.inf t1 = np.arctan(B_l) t2 = np.arctan(a) #if t<0 or t>np.pi/2: if any(x< -np.pi/2 or x> np.pi/2 for x in (t1,t2)): return -np.inf #Hyperparameters lambda_logL0 = 1.0 sigma_a, sigma_b = 1,1 p = 0 #Exponential in logL0 p+= expon.logpdf(logL0, scale = 1/lambda_logL0) #Uniform in arctan(B_l) and arctan(a) p+=2*np.log(2/np.pi) #flat prior for b #Have not idea what it would be, start with nothing #p+=0 #Inv Gamma for sigma p-= gamma.logpdf(sigma**2,sigma_a, scale = sigma_b) return p
def llf(self, n, beta, gamma, simulation_index): """ Compute the log-likelihood of the simulation specified by `simulation_index` given the parameters N, :math:`\\beta`, and :math:`\\gamma`. Parameters ---------- n : float Total number of individuals in the SIR model. beta : float Parameter beta in the SIR model. See corresponding argument in :meth:`simulate` for more information. gamma : float Parameter gamma in the SIR model. See corresponding argument in :meth:`simulate` for more information. simulation_index : int, 0 <= `simulation_number` < `len(self.data_)` Index specifying the simulation in `self.data_`. Returns ------- llf : float The log-likelihood. Notes ----- The main difference to the API of the R code is the missing I.0 parameter which is not used in the R code as well. """ simulation = self.data_[simulation_index] llf = 0 state_new = simulation.iloc[0] t_new = state_new["t"] for j in range(1, simulation.shape[0]): t_old = t_new state_old = state_new state_new = dict(simulation.iloc[j]) t_new = state_new["t"] rate_i, rate_r, birth_death_rate_i, birth_rate_r, change_rate = \ self._rates_sir(state_old, beta, gamma, n) if change_rate == 0: break # likelihood of waiting the time we waited # NOTE: R code uses `rate=change_rate` while in Python it is # `scale=1/change_rate` llf += expon.logpdf(t_new - t_old, scale=1 / change_rate) # likelihood of observing the event we observed if _infection(state_old, state_new): llf += log(rate_i / change_rate) elif _recovery(state_old, state_new): llf += log(rate_r / change_rate) elif _death_birth_i(state_old, state_new): llf += log(birth_death_rate_i / change_rate) elif _birth_r(state_old, state_new): llf += log(birth_rate_r / change_rate) n += 1 return llf
def llf(observation, n, beta, sigma, gamma, verbose): """ Compute the log-likelihood of the simulation specified by `simulation_index` given the parameters N, :math:`\\beta`, and :math:`\\gamma`. Parameters ---------- observation : pandas.DataFrame DataFrame representing the observed SEIR model. Columns are "s", "i", "r", and "t" for the compartments S, I, and R and the time, respectively. n : int or float Total number of individuals in the SEIR model. beta : float Parameter beta in the SEIR model. See corresponding argument in :meth:`simulate` for more information. sigma : float Parameter sigma in the SEIR model. See corresponding argument in :meth:`simulate` for more information. gamma : float Parameter gamma in the SEIR model. See corresponding argument in :meth:`simulate` for more information. verbose : bool If True, print debug output. Returns ------- llf : float The log-likelihood. """ llf = 0 state_new = observation.iloc[0] t_new = state_new["t"] for j in range(1, observation.shape[0]): if verbose: print(state_new) t_old = t_new state_old = state_new state_new = dict(observation.iloc[j]) t_new = state_new["t"] rate_e, rate_i, rate_r, change_rate = _rates_seir( state_old, beta, sigma, gamma, n) if change_rate == 0: break # expon.pdf(t_new - t_old, scale=1 / change_rate) gives the likelihood # of waiting the time we waited # NOTE: R code uses `rate=change_rate` while in Python it is # `scale=1/change_rate` llf += expon.logpdf(t_new - t_old, scale=1 / change_rate) # likelihood of observing the event we observed if _infection_latent(state_old, state_new): llf += log(rate_e / change_rate) elif _infection_active(state_old, state_new): llf += log(rate_i / change_rate) elif _recovery(state_old, state_new): llf += log(rate_r / change_rate) return llf
def test_logprob(self): lam = torch.exp(Variable(torch.randn(100))) value = Variable(torch.randn(100)) dist = Exponential(lam) # test log probability res1 = dist.log_prob(value).data res2 = expon.logpdf(value.data.numpy(), scale=1.0 / lam.data.numpy()) res2[np.isinf(res2)] = dist.LOG_0 self.assertEqual(res1, res2)
def __call__(self, x): """ Args: x (float): weight Returns: Log probability for weight """ if not isinstance(x, np.ndarray): x = np.array(x) return expon.logpdf(np.sign(self.mu) * x, scale=np.sign(self.mu) * self.mu)
def PdistBIC(Stat_Stn): def BIC(n, k, lnL): return np.log(n) * k - 2 * lnL # BIC = ln(n)k-2ln(L) Pexpon = [] Pgamma = [] Pweibull = [] Plognorm = [] MonthlyStat = Stat_Stn["MonthlyStat"] Prep = Stat_Stn["PrepDF"]["P"] for m in range(0, 12): Prep_m = Prep[Prep.index.month == (m + 1)] Prep_m = Prep_m[Prep_m > 0] n = len(Prep_m) coef1 = MonthlyStat.loc[m + 1, "exp"] coef2 = MonthlyStat.loc[m + 1, "gamma"] coef3 = MonthlyStat.loc[m + 1, "weibull"] coef4 = MonthlyStat.loc[m + 1, "lognorm"] Pexpon.append( BIC(n, 1, np.sum(expon.logpdf(Prep_m, coef1[0], coef1[1])))) Pgamma.append( BIC(n, 1, np.sum(gamma.logpdf(Prep_m, coef2[0], coef2[1], coef2[2])))) Pweibull.append( BIC( n, 1, np.sum(weibull_min.logpdf(Prep_m, coef3[0], coef3[1], coef3[2])))) Plognorm.append( BIC(n, 1, np.sum(norm.logpdf(np.log(Prep_m), coef4[0], coef4[1])))) data = { "exp": Pexpon, "gamma": Pgamma, "weibull": Pweibull, "lognorm": Plognorm } StatPdistBIC = pd.DataFrame(data, columns=data.keys(), index=np.arange(1, 13)) return StatPdistBIC
def log_prior(a,b,sigma): if any(x<0 for x in (a,sigma)): #if sigma<0: return - np.inf t = np.arctan(b) #if t<0 or t>np.pi/2: if t<-np.pi/2 or t>np.pi/2: return -np.inf #Hyperparameters lambda_a = 1.0 sigma_a, sigma_b = 1,1 p = 0 #Exponential in log a #p+= np.log(lambda_a)-lambda_a*np.log(a) #p+= np.log(lambda_a)-lambda_a*a #changed a => logA TODO Change variable name? p+=expon.logpdf(np.log(a), scale = 1/lambda_a) #Uniform in arctan(b) p+=np.log(2/np.pi) #Inv Gamma for sigma p-= gamma.logpdf(sigma,sigma_a, scale = sigma_b) return p
def features_to_gaussian(header, row, limits): # Does this look like a mean-variance feature file? if len(header) == 3: mean = None if 'mean' in header: mean = float(row[header.index('mean')]) if 'mode' in header: mean = float(row[header.index('mode')]) if .5 in header: mean = float(row[header.index(.5)]) if mean is None: return None if 'var' in header: var = float(row[header.index('var')]) elif 'sdev' in header: var = float(row[header.index('sdev')]) * float(row[header.index('sdev')]) else: return None if np.isnan(var) or var == 0: return SplineModelConditional.make_single(mean, mean, []) # This might be uniform if mean - 2*var < limits[0] or mean + 2*var > limits[1]: return None return SplineModelConditional.make_gaussian(limits[0], limits[1], mean, var) elif len(header) == 4: # Does this look like a mean and evenly spaced p-values? header = header[1:] # Make a copy of the list row = row[1:] mean = None if 'mean' in header: mean = float(row.pop(header.index('mean'))) header.remove('mean') elif 'mode' in header: mean = float(row.pop(header.index('mode'))) header.remove('mode') elif .5 in header: mean = float(row.pop(header.index(.5))) header.remove(.5) else: return None # Check that the two other values are evenly spaced p-values row = map(float, row[0:2]) if np.all(np.isnan(row)): return SplineModelConditional.make_single(mean, mean, []) if header[1] == 1 - header[0] and abs(row[1] - mean - (mean - row[0])) < abs(row[1] - row[0]) / 1000.0: lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] if lowv == mean: return SplineModelConditional.make_single(mean, mean, []) lowerbound = 1e-4 * (mean - lowv) upperbound = np.sqrt((mean - lowv) / lowp) sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) < mean - 3*sdev and float(limits[1]) > mean + 3*sdev: return SplineModelConditional.make_gaussian(limits[0], limits[1], mean, sdev*sdev) else: return None else: # Heuristic best curve: known tails, fit to mean lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] lowerbound = 1e-4 * (mean - lowv) upperbound = np.log((mean - lowv) / lowp) low_sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) > mean - 3*low_sdev: return None low_segment = SplineModelConditional.make_gaussian(float(limits[0]), lowv, mean, low_sdev*low_sdev) highp = max(header) highv = np.array(row)[np.array(header) == highp][0] lowerbound = 1e-4 * (highv - mean) upperbound = np.log((highv - mean) / (1 - highp)) high_scale = brentq(lambda scale: .5 + expon.cdf(highv, mean, scale) / 2 - highp, lowerbound, upperbound) if float(limits[1]) < mean + 3*high_scale: return None # Construct exponential, starting at mean, with full cdf of .5 high_segment = SplineModelConditional.make_single(highv, float(limits[1]), [np.log(1/high_scale) + np.log(.5) + mean / high_scale, -1 / high_scale]) sevenys = np.linspace(lowv, highv, 7) ys = np.append(sevenys[0:2], [mean, sevenys[-2], sevenys[-1]]) lps0 = norm.logpdf(ys[0:2], mean, low_sdev) lps1 = expon.logpdf([ys[-2], ys[-1]], mean, high_scale) + np.log(.5) #bounds = [norm.logpdf(mean, mean, low_sdev), norm.logpdf(mean, mean, high_sdev)] result = minimize(lambda lpmean: FeaturesInterpreter.skew_gaussian_evaluate(ys, np.append(np.append(lps0, [lpmean]), lps1), low_segment, high_segment, mean, lowp, highp), .5, method='Nelder-Mead') print np.append(np.append(lps0, result.x), lps1) return FeaturesInterpreter.skew_gaussian_construct(ys, np.append(np.append(lps0, result.x), lps1), low_segment, high_segment)
def features_to_gaussian(header, row, limits): # Does this look like a mean-variance feature file? if len(header) == 3: mean = None if 'mean' in header: mean = float(row[header.index('mean')]) if 'mode' in header: mean = float(row[header.index('mode')]) if .5 in header: mean = float(row[header.index(.5)]) if mean is None: return None if 'var' in header: var = float(row[header.index('var')]) elif 'sdev' in header: var = float(row[header.index('sdev')]) * float( row[header.index('sdev')]) else: return None if np.isnan(var) or var == 0: return SplineModelConditional.make_single(mean, mean, []) # This might be uniform if mean - 2 * var < limits[0] or mean + 2 * var > limits[1]: return None return SplineModelConditional.make_gaussian( limits[0], limits[1], mean, var) elif len(header) == 4: # Does this look like a mean and evenly spaced p-values? header = header[1:] # Make a copy of the list row = row[1:] mean = None if 'mean' in header: mean = float(row.pop(header.index('mean'))) header.remove('mean') elif 'mode' in header: mean = float(row.pop(header.index('mode'))) header.remove('mode') elif .5 in header: mean = float(row.pop(header.index(.5))) header.remove(.5) else: return None # Check that the two other values are evenly spaced p-values row = map(float, row[0:2]) if np.all(np.isnan(row)): return SplineModelConditional.make_single(mean, mean, []) if header[1] == 1 - header[0] and abs(row[1] - mean - ( mean - row[0])) < abs(row[1] - row[0]) / 1000.0: lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] if lowv == mean: return SplineModelConditional.make_single(mean, mean, []) lowerbound = 1e-4 * (mean - lowv) upperbound = np.sqrt((mean - lowv) / lowp) sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) < mean - 3 * sdev and float( limits[1]) > mean + 3 * sdev: return SplineModelConditional.make_gaussian( limits[0], limits[1], mean, sdev * sdev) else: return None else: # Heuristic best curve: known tails, fit to mean lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] lowerbound = 1e-4 * (mean - lowv) upperbound = np.log((mean - lowv) / lowp) low_sdev = brentq( lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) > mean - 3 * low_sdev: return None low_segment = SplineModelConditional.make_gaussian( float(limits[0]), lowv, mean, low_sdev * low_sdev) highp = max(header) highv = np.array(row)[np.array(header) == highp][0] lowerbound = 1e-4 * (highv - mean) upperbound = np.log((highv - mean) / (1 - highp)) high_scale = brentq( lambda scale: .5 + expon.cdf(highv, mean, scale) / 2 - highp, lowerbound, upperbound) if float(limits[1]) < mean + 3 * high_scale: return None # Construct exponential, starting at mean, with full cdf of .5 high_segment = SplineModelConditional.make_single( highv, float(limits[1]), [ np.log(1 / high_scale) + np.log(.5) + mean / high_scale, -1 / high_scale ]) sevenys = np.linspace(lowv, highv, 7) ys = np.append(sevenys[0:2], [mean, sevenys[-2], sevenys[-1]]) lps0 = norm.logpdf(ys[0:2], mean, low_sdev) lps1 = expon.logpdf([ys[-2], ys[-1]], mean, high_scale) + np.log(.5) #bounds = [norm.logpdf(mean, mean, low_sdev), norm.logpdf(mean, mean, high_sdev)] result = minimize( lambda lpmean: FeaturesInterpreter.skew_gaussian_evaluate( ys, np.append(np.append(lps0, [lpmean]), lps1), low_segment, high_segment, mean, lowp, highp), .5, method='Nelder-Mead') print np.append(np.append(lps0, result.x), lps1) return FeaturesInterpreter.skew_gaussian_construct( ys, np.append(np.append(lps0, result.x), lps1), low_segment, high_segment)
def __call__(self, param): if np.any(param<0): return -np.inf return expon.logpdf(param, scale=self.scale)
def log_likelihood(self, data): t = self.params[0] return expon.logpdf(data - t, scale=self.tau)