def test_lstats_gamma(self): "Try lstats on the gamma distribution" _gam = [1., 0.5, 0.33333334, 0.16666667] assert_almost_equal(dist.gamma.lstats(4, 1), np.array(_gam)) assert_almost_equal(dist.gamma(1., ).lstats(4), np.array(_gam)) _gam = [10.0, 1.76197052, 0.10350336, 0.12585956] assert_almost_equal(dist.gamma.lstats(4, 10.), np.array(_gam)) assert_almost_equal(dist.gamma(10., ).lstats(4), np.array(_gam))
def test_lstats_gamma(self): "Try lstats on the gamma distribution" _gam = [ 1., 0.5, 0.33333334, 0.16666667] assert_almost_equal(dist.gamma.lstats(4, 1), np.array(_gam)) assert_almost_equal(dist.gamma(1.,).lstats(4), np.array(_gam)) _gam = [10.0, 1.76197052, 0.10350336, 0.12585956] assert_almost_equal(dist.gamma.lstats(4, 10.), np.array(_gam)) assert_almost_equal(dist.gamma(10.,).lstats(4), np.array(_gam))
def get_generalized_distribution(self, params, scale=1, exog=None, exposure=None, offset=None): """ Returns a random number generator for the predictive distribution. Parameters ---------- params : array-like The model parameters. scale : scalar The scale parameter. exog : array-like The predictor variable matrix. Returns a frozen random number generator object. Use the ``rvs`` method to generate random values. Notes ----- Due to the behavior of ``scipy.stats.distributions objects``, the returned random number generator must be called with ``gen.rvs(n)`` where ``n`` is the number of observations in the data set used to fit the model. If any other value is used for ``n``, misleading results will be produced. """ fit = self.predict(params, exog, exposure, offset, linear=False) import scipy.stats.distributions as dist if isinstance(self.family, families.Gaussian): return dist.norm(loc=fit, scale=np.sqrt(scale)) elif isinstance(self.family, families.Binomial): return dist.binom(n=1, p=fit) elif isinstance(self.family, families.Poisson): return dist.poisson(mu=fit) elif isinstance(self.family, families.Gamma): alpha = fit / float(scale) return dist.gamma(alpha, scale=scale) else: raise ValueError( "get_generalized_distribution not implemented for %s" % self.family.name)
def init_distributions(pkey, kind='dpm', mu = None, sigma = None, nrvs=25, tb=.65): """ sample random parameter sets to explore global minima (called by Optimizer method __hop_around__()) """ if mu is None: mu = {'a': .15, 'tr': .02, 'v': 1., 'ssv': -1., 'z': .1, 'xb': 1., 'sso': .15, 'vi': .35, 'vd': .5} if sigma is None: sigma = {'a': .35, 'tr': .25, 'v': .5, 'ssv': .5, 'z': .05, 'xb': .5, 'sso': .01, 'vi': .4, 'vd': .5} normal_params = ['tr', 'v', 'vd', 'ssv', 'z', 'xb', 'sso'] gamma_params = ['a', 'tr'] uniform_params = ['vd', 'vi'] if 'race' in kind: sigma['ssv'] = abs(mu['ssv']) bounds = get_bounds(kind=kind)[pkey] loc = mu[pkey] scale = sigma[pkey] # init and freeze dist shape if pkey in normal_params: dist = norm(loc, scale) elif pkey in gamma_params: dist = gamma(1.0, loc, scale) elif pkey in uniform_params: dist = uniform(loc, scale) # generate random variates rvinits = dist.rvs(nrvs) while rvinits.min() < bounds[0]: # apply lower limit ix = rvinits.argmin() rvinits[ix] = dist.rvs() while rvinits.max() > bounds[1]: # apply upper limit ix = rvinits.argmax() rvinits[ix] = dist.rvs() if pkey =='tr': rvinits = np.abs(rvinits) return rvinits
def segment(image, n_segments=2, burn_in=1000, samples=1000, lag=5): """ Return image segment samples. Parameters ---------- image : (N,M) ndarray Pixel array with single-dimension values (e.g. hue) Returns ------- labels : (samples,N,M) ndarray The image segment label array emission_params: (samples,K,2) ndarray The Gaussian emission distribution parameters (mean, precision) log_probs : (samples,) ndarray """ # allocate arrays res_labels = zeros((samples, image.shape[0], image.shape[1]), dtype=int) res_emission_params = zeros((samples, n_segments, 6)) res_log_prob = zeros((samples,)) padded_labels = ones((image.shape[0] + 2, image.shape[1] + 2), dtype=int)*-1 labels = padded_labels[1:-1, 1:-1] emission_params = zeros((n_segments, 6)) log_prob = None conditional = zeros((n_segments,)) # init emission_params sample_mean_r = image[:,:,0].mean() sample_mean_g = image[:,:,1].mean() sample_mean_b = image[:,:,2].mean() sample_var_r = image[:,:,0].var() sample_var_g = image[:,:,1].var() sample_var_b = image[:,:,2].var() sample_prec_r = 1./sample_var_r sample_prec_g = 1./sample_var_g sample_prec_b = 1./sample_var_b for k in xrange(n_segments): """ emission_params[k,0] = norm.rvs(sample_mean_r, sqrt(sample_var_r/n_segments)) emission_params[k,1] = sample_prec_r emission_params[k,2] = norm.rvs(sample_mean_g, sqrt(sample_var_g/n_segments)) emission_params[k,3] = sample_prec_g emission_params[k,4] = norm.rvs(sample_mean_b, sqrt(sample_var_b/n_segments)) emission_params[k,5] = sample_prec_b """ emission_params[k,0] = norm.rvs(0.5, 0.1) emission_params[k,1] = 1/(0.25**2) emission_params[k,2] = norm.rvs(0.5, 0.1) emission_params[k,3] = 1/(0.25**2) emission_params[k,4] = norm.rvs(0.5, 0.1) emission_params[k,5] = 1/(0.25**2) # init labels for n in xrange(image.shape[0]): for m in xrange(image.shape[1]): labels[n,m] = randint(0, n_segments) try: # gibbs for i in xrange(burn_in + samples*lag - (lag - 1)): for n in xrange(image.shape[0]): for m in xrange(image.shape[1]): # resample label for k in xrange(n_segments): labels[n,m] = k conditional[k] = 0. conditional[k] += phi_blanket( memoryview(padded_labels), n, m, memoryview(FS)) """ for x in xrange(max(n-2,0), min(n+3,image.shape[0])): for y in xrange(max(m-2,0), min(m+3, image.shape[1])): clique = padded_labels[x:x+3,y:y+3] conditional[k] += phi(clique) """ mean_r = emission_params[k, 0] var_r = 1./emission_params[k, 1] mean_g = emission_params[k, 2] var_g = 1./emission_params[k, 3] mean_b = emission_params[k, 4] var_b = 1./emission_params[k, 5] conditional[k] += log(norm.pdf(image[n,m,0], mean_r, sqrt(var_r))) conditional[k] += log(norm.pdf(image[n,m,1], mean_g, sqrt(var_g))) conditional[k] += log(norm.pdf(image[n,m,2], mean_b, sqrt(var_b))) labels[n,m] = sample_categorical(conditional) for k in xrange(n_segments): mask = (labels == k) # resample label mean red mean_r = emission_params[k, 0] prec_r = emission_params[k, 1] numer_r = TAU_0*MU_0 + prec_r*sum(image[mask][:, 0]) denom_r = TAU_0 + prec_r*sum(mask) post_mean_r = numer_r/denom_r post_var_r = 1./(denom_r) emission_params[k, 0] = norm.rvs(post_mean_r, sqrt(post_var_r)) # resample label var red post_alpha_r = ALPHA_0 + sum(mask)/2. post_beta_r = BETA_0 + sum((image[mask][:, 0] - emission_params[k,0])**2)/2. post_r = gamma(post_alpha_r, scale=1./post_beta_r) emission_params[k, 1] = post_r.rvs() # resample label mean green mean_g = emission_params[k, 2] prec_g = emission_params[k, 3] numer_g = TAU_0*MU_0 + prec_g*sum(image[mask][:, 1]) denom_g = TAU_0 + prec_g*sum(mask) post_mean_g = numer_g/denom_g post_var_g = 1./(denom_g) emission_params[k, 2] = norm.rvs(post_mean_g, sqrt(post_var_g)) # resample label var green post_alpha_g = ALPHA_0 + sum(mask)/2. post_beta_g = BETA_0 + sum((image[mask][:, 1] - emission_params[k,2])**2)/2. post_g = gamma(post_alpha_g, scale=1./post_beta_g) emission_params[k, 3] = post_g.rvs() # resample label mean blue mean_b = emission_params[k, 4] prec_b = emission_params[k, 5] numer_b = TAU_0*MU_0 + prec_b*sum(image[mask][:, 2]) denom_b = TAU_0 + prec_b*sum(mask) post_mean_b = numer_b/denom_b post_var_b = 1./(denom_b) emission_params[k, 4] = norm.rvs(post_mean_b, sqrt(post_var_b)) # resample label var blue post_alpha_b = ALPHA_0 + sum(mask)/2. post_beta_b = BETA_0 + sum((image[mask][:, 2] - emission_params[k,4])**2)/2. post_b = gamma(post_alpha_b, scale=1./post_beta_b) emission_params[k, 5] = post_b.rvs() log_prob = 0. for n in xrange(image.shape[0]): for m in xrange(image.shape[1]): #clique = padded_labels[n:n+3,m:m+3] label = labels[n,m] mean_r = emission_params[label, 0] var_r = 1./emission_params[label, 1] mean_g = emission_params[label, 2] var_g = 1./emission_params[label, 3] mean_b = emission_params[label, 4] var_b = 1./emission_params[label, 5] #log_prob += phi(clique) log_prob += log(norm.pdf(image[n,m,0], mean_r, sqrt(var_r))) log_prob += log(norm.pdf(image[n,m,1], mean_g, sqrt(var_g))) log_prob += log(norm.pdf(image[n,m,2], mean_b, sqrt(var_b))) # prior on theta? log_prob += phi_all(memoryview(padded_labels), memoryview(FS)) sys.stdout.write('\riter {} log_prob {}'.format(i, log_prob)) sys.stdout.flush() if i < burn_in: pass elif not (i - burn_in)%lag: res_i = i/lag res_emission_params[res_i] = emission_params[:] res_labels[res_i] = labels res_log_prob[i] = log_prob sys.stdout.write('\n') return res_labels, res_emission_params, res_log_prob except KeyboardInterrupt: return res_labels, res_emission_params, res_log_prob
boots = 1000 real_data = data[state].values df = pd.DataFrame() df['active'] = real_data df.to_csv('temp_files/dataset.csv', index=False) try: means = np.load("Output/Rt/States/Rt_" + state + "_means.npy") sds = np.load("Output/Rt/States/Rt_" + state + "_sds.npy") dat_means = np.load("Output/Rt/States/" + state + "_means.npy") dat_sds = np.load("Output/Rt/States/" + state + "_sds.npy") except: rt = [] dats = [] for n in range(boots): print("Iteration: ", n + 1, end='\r') G = gamma(3.325 + 0.616 * np.random.normal(), 0.979 + 0.195 * np.random.normal()) dataset = np.copy(real_data) for i in range(len(dataset)): send_back = np.clip(np.round(G.rvs(int(dataset[i]))), 0, 10) send_back = send_back[i - send_back >= 0] dataset[i] = 0 for j in np.unique(np.int32(send_back)): dataset[i - j] += np.sum(send_back == j) df = pd.DataFrame() df['active'] = dataset[:-10] dats.append(dataset[:-10]) df.to_csv('temp_files/dataset.csv', index=False) call(['RScript.exe', 'Rscripts/Rt-bootstrap (Uncorrected).R']) rt.append(pd.read_csv('temp_files/rtoutput.csv')) means = np.array([x["Mean(R)"].values for x in rt])
def between(*quantiles): parameters = distributions.gamma.fit(polygon(*quantiles)) distribution = distributions.gamma(*parameters) return RandomVariable(distribution)
def gamma(alpha=1.0, beta=1.0): """ alpha = k beta = 1/theta """ return dists.gamma(alpha, scale=1. / beta)
def particle_filter_detector(ser1, taps, models): # particle : (id, rate, censor, last_censor prev_particle) # Model paramaters normal_std_factor = 4 censorship_std_factor = 7 censorship_prior_model = 0.01 change_tap_prior_model = 0.1 # Sampling parameters change_tap_sample = 0.2 censorship_prior_sample = 0.3 particle_number = 1000 mult_particles = 1 # Check consistancy once for t in models: assert len(ser1) == len(models[t]) # Clean up a bit the data series2 = [] last = None first = None # Process series for s in ser1: if s == None: series2 += [last] else: if first == None: first = s series2 += [s] last = s series2 = [s if s != None else first for s in series2] series = series2 # Data structures to keep logs particles = {} outputlog = [(series[0],series[0])] # Initial particles: particles[0] = [] G = gamma(max(1,series[0]), 1) for pi, r in enumerate(G.rvs(particle_number)): particles[0] += [(pi, r, False, None, 0, random.choice(taps), False)] # Now run the sampler for all times for pi in range(1, len(series)): assert models != None assert taps != None # Normal distributions from taps and the model standard deviation for normality and censorship round_models = {} for ti in taps: NoCensor = norm(models[ti][pi][0], (models[ti][pi][1] * normal_std_factor)**2) Censor = norm(models[ti][pi][0], (models[ti][pi][1] * censorship_std_factor)**2) round_models[ti] = (NoCensor, Censor) # Store for expanded pool of particles temporary_particles = [] # Expand the distribution for p in particles[pi-1]: p_old, C_old, j = tracebackp(particles, p, pi-1, p[5] - 1) # taps[0] - 1) # Serial number of old particle p_old_num = None if p_old != None: p_old_num = p_old[0] # Create a number of candidate particles from each previous particle for _ in range(mult_particles): # Sample a new tap for the candidate particle new_tap = p[5] if random.random() < change_tap_sample: new_tap = random.choice(taps) # Update this censorship flag C = False if random.random() < censorship_prior_sample: C = True # Determine new rate new_p = None if p_old == None: new_p = p[1] # continue as before if C | C_old: while new_p == None or new_p < 0: new_p = p_old[1] * (1 + round_models[new_tap][1].rvs(1)[0]) ## censor models else: while new_p == None or new_p < 0: new_p = p_old[1] * (1 + round_models[new_tap][0].rvs(1)[0]) ## no censor models # Build and register new particle newpi = (None, new_p, C, p[0], pi, new_tap, C | C_old) temporary_particles += [newpi] # Assign a weight to each sampled candidtae particle weights = [] for px in temporary_particles: wx = 1.0 # Adjust weight to observation if not series[pi] == None: poisson_prob = poisson.pmf(series[pi], px[1]) #print poisson_prob, px wx *= poisson_prob # Adjust the probability of censorship if px[2]: wx *= censorship_prior_model / censorship_prior_sample else: wx *= (1 - censorship_prior_model) / (1 - censorship_prior_sample) # Adjust the probability of changing the tap if px[5] == particles[pi-1][px[3]][5]: wx *= (1 - change_tap_prior_model) / (((1-change_tap_sample) + change_tap_sample*(1.0 / len(taps)))) else: wx *= (change_tap_prior_model) / (1 - (((1-change_tap_sample) + change_tap_sample*(1.0 / len(taps))))) weights += [wx] weights_sum = sum(weights) ## Resample according to weight particles[pi] = [] for pid in range(particle_number): px = samplep(weights, weights_sum, temporary_particles) px = (pid, px[1], px[2], px[3], px[4], px[5], px[6]) particles[pi] += [px] ## Collect some statistics ## stats Ci = 0 mean = 0 for px in particles[pi]: if px[2]: Ci += 1 mean += px[1] mean = mean / len(particles[pi]) # Diversity Div = len(set([pv[3] for pv in particles[pi]])) # Range of values range_normal = sorted([pn[1] for pn in temporary_particles if not pn[2]]) Base = range_normal[len(range_normal)/2] Mn = range_normal[len(range_normal)*1/100] Mx = range_normal[len(range_normal)*99/100] outputlog += [(Mn, Mx)] # How many are using the censorship model at any time? censor_model_stat = len([1 for pn in particles[pi] if pn[6]])* 100 / len(particles[pi]) # Build histogram of taps tap_hist = {} for px in particles[pi]: tap_hist[px[5]] = tap_hist.get(px[5], 0) + 1 print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (pi, Ci, mean, series[pi], tap_hist, Base, Mn, Mx, Div, censor_model_stat) # print " [%s - %s]" % (key_series_point*(1+NoCensor.ppf(0.00001)), key_series_point*(1+NoCensor.ppf(0.99999))) return particles, outputlog
import pandas as pd import numpy as np from scipy.stats.distributions import expon, gamma, rayleigh, norm, t, uniform from posteriori import between def RMSE(predicted, expected): return np.linalg.norm(predicted - expected) / np.sqrt(len(predicted)) distributions = [ norm(), t(df=5), gamma(a=2), gamma(a=4), gamma(a=8), expon(scale=1/0.5), expon(scale=1/1), expon(scale=1/2), rayleigh(), uniform(), ] errors = [] for distribution in distributions: parameters = [k + '=' + str(v) for k, v in distribution.kwds.items()] name = "{name}({parameters})".format( name=distribution.dist.name, parameters=', '.join(parameters) )
import pandas as pd import numpy as np from scipy.stats.distributions import gamma ICL_STD = gamma(1 / 0.45**2, 0, 18.8 * 0.45**2) ICL_ITS = gamma(1 / 0.86**2, 0, 5.1 * 0.86**2) DEATHS_DAYS_S = np.array( [ICL_STD.cdf(a + 1) - ICL_STD.cdf(a) for a in range(100)]) S_DAYS = np.array([ICL_ITS.cdf(a + 1) - ICL_ITS.cdf(a) for a in range(60)]) DEATHS_DAYS = np.convolve(DEATHS_DAYS_S, S_DAYS) def normalize_jh_data(jh, name): jh['Country/Region'] = jh[['Country/Region', 'Province/State' ]].replace(np.nan, '' "" '').agg(' - '.join, axis=1).str.strip('- ') jh = jh.drop(columns=['Province/State', 'Lat', 'Long']) jh = jh.melt(id_vars=['Country/Region'], value_name=name) jh['variable'] = pd.to_datetime(jh['variable']) return jh def get_jh_data(): jhcc = pd.read_csv( 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv' )
from .utils import atleast_list, issequence from .mathfun.special import logsumexp from .basis_functions import LinearBasis, apply_grad from .likelihoods import Gaussian from .optimize import sgd, structured_sgd, logtrick_sgd, Adam from .btypes import Bound, Positive, Parameter # Set up logging log = logging.getLogger(__name__) # Module settings WGTRND = norm() # Sampling distribution over mixture weights COVRND = gamma(a=2, scale=0.5) # Sampling distribution over mixture covariance LOGITER = 500 # Number of SGD iterations between logging ELBO and hypers class GeneralizedLinearModel(BaseEstimator, RegressorMixin): r""" Bayesian Generalized linear model (GLM). This provides a scikit learn compatible interface for the glm module. Parameters ---------- likelihood : Object A likelihood object, see the likelihoods module. basis : Basis A basis object, see the basis_functions module.
import numpy as np import random import matplotlib.pyplot as plt import copy import os import os.path import optparse import scipy from optparse import OptionParser from scipy.stats import distributions normal1 = distributions.norm(3, 1.1) normal2 = distributions.norm(7, 1) uniform = distributions.uniform(0, 10) gamma = distributions.gamma(6, 0.01) x = np.linspace(0, 10, 100) y1 = normal1.pdf(x) + normal2.pdf(x) y2 = uniform.pdf(x) plt.plot(x, y2) plt.fill_between(x, 0, y2) plt.title('Prior Distribution') plt.ylim([0, 0.5]) plt.show() plt.plot(x, y1 * y2) #plt.plot(x,gamma.pdf(x)) plt.fill_between(x, 0, y1 * y2) plt.title('Posterior Distribution') plt.show()
from sklearn.utils.validation import check_is_fitted, check_X_y, check_array from sklearn.utils import check_random_state from .utils import atleast_list, issequence from .mathfun.special import logsumexp from .basis_functions import LinearBasis, apply_grad from .likelihoods import Gaussian from .optimize import sgd, structured_sgd, logtrick_sgd, Adam from .btypes import Bound, Positive, Parameter # Set up logging log = logging.getLogger(__name__) # Module settings WGTRND = norm() # Sampling distribution over mixture weights COVRND = gamma(a=2, scale=0.5) # Sampling distribution over mixture covariance LOGITER = 500 # Number of SGD iterations between logging ELBO and hypers class GeneralizedLinearModel(BaseEstimator, RegressorMixin): r""" Bayesian Generalized linear model (GLM). This provides a scikit learn compatible interface for the glm module. Parameters ---------- likelihood : Object A likelihood object, see the likelihoods module. basis : Basis A basis object, see the basis_functions module.
def gamma(alpha=1.0, beta=1.0): """ alpha = k beta = 1/theta """ return dists.gamma(alpha, scale=1./beta)
# The number of allowable model runs n_samples = 500 # scipy.stats.distributions objects for each distribution, per Table 1 in the paper. Note that for truncated normal, the bounds are relative to the mean in units of scale, so if we want a positive distribution for a normal with mean 8 and sigma 4, then the lower bound is -8/4=-2 distributions = { "GCM": randint(0, 4), "FICE": truncnorm(-4 / 4.0, 4.0 / 4, loc=8, scale=4), "FSNOW": truncnorm(-4.1 / 3, 4.1 / 3, loc=4.1, scale=1.5), "PRS": uniform(loc=5, scale=2), "RFR": truncnorm(-0.4 / 0.3, 0.4 / 0.3, loc=0.5, scale=0.2), "OCM": randint(-1, 2), "OCS": randint(-1, 2), "TCT": randint(-1, 2), "VCM": truncnorm(-0.35 / 0.2, 0.35 / 0.2, loc=1, scale=0.2), "PPQ": truncnorm(-0.35 / 0.2, 0.35 / 0.2, loc=0.6, scale=0.2), "SIAE": gamma(1.5, scale=0.8, loc=1), } # Names of all the variables keys = ["GCM", "FICE", "FSNOW", "PRS", "RFR", "OCM", "OCS", "TCT", "VCM", "PPQ", "SIAE"] # Generate the latin hypercube samples with uniform distributions unif_sample = lhs(len(keys), n_samples) # To hold the transformed variables dist_sample = np.zeros_like(unif_sample) # For each variable, transform with the inverse of the CDF (inv(CDF)=ppf) for i, key in enumerate(keys): dist_sample[:, i] = distributions[key].ppf(unif_sample[:, i])
''' Created on Jun 17, 2014 @author: rch ''' from scipy.stats.distributions import gamma import numpy as np x = np.linspace(0, 0.5, 4000) shapes = np.array([0.18, 0.15, 0.23, 0.15, 0.20, 0.18], dtype='f') scales = np.array([0.8, 0.7, 0.6, 1.0, 0.7, 0.8], dtype='f') locs = np.array([0.0055, 0.0080, 0.0010, 0.0050, 0.0090, 0.0057], dtype='f') import pylab as p for i in range(0, len(shapes)): g = gamma(shapes[i], scale=scales[i], loc=locs[i]) pdf = g.pdf(x) p.plot(x, pdf) p.show()