Esempio n. 1
0
def Cauchy_Entropy(w0, l0, w1, l1):
    if w0 == 0 and l0 == 0:
        x0 = 0.5
    else:
        x0 = w0/(w0 + l0)    # Mode of beta
    sig0 = np.sqrt( (w0+1)*(l0+1)/( (w0+l0+2)**2 * (w0+l0+3) ) )    # Std of beta
    if w1 == 0 and l1 ==0:
        x1 = 0.5
    else:
        x1 = w1/(w1 + l1)
    sig1 = np.sqrt( (w1+1)*(l1+1)/( (w1+l1+2)**2 * (w1+l1+3) ) )

    global MC_samples
    seq.reset()
    x = seq.get(MC_samples)
    x = np.reshape(x, MC_samples)

    pdf0 = cauchy.pdf(x, x0, sig0)
    pdf1 = cauchy.pdf(x, x1, sig1)
    cdf0 = cauchy.cdf(x, x0, sig0)
    cdf1 = cauchy.cdf(x, x1, sig1)

    rho = pdf0 * cdf1 + pdf1 * cdf0
    integral = np.mean( -rho * np.log( rho ) )

    return integral
Esempio n. 2
0
def fit_func(df, col_num, title='Plot', xlims=None, ylims=None, no_bins=100):

    plt.figure()
    #plt.subplot(211)
    plt.title(title)

    # Grab the desired column while removing np.nan values and grab within the xrange
    x = df.loc[~np.isnan(df.iloc[:, col_num]), :].iloc[:, col_num]

    ### Populate this function variable
    if not xlims:
        xlims = (min(x), max(x))
    if ylims:
        plt.ylim(ylims)

    x = x[x > xlims[0]]
    x = x[x < xlims[1]]

    plt.hist(x, no_bins, normed=True)

    ### Cauchy Fit
    xdom = np.linspace(xlims[0], xlims[1], 1000)
    loc, scale = cauchy.fit(x)
    plt.plot(xdom, cauchy.pdf(xdom, loc, scale), label='Cauchy')

    ### Gaussian Fit
    mu, sigma = norm.fit(x)
    plt.plot(xdom, norm.pdf(xdom, mu, sigma), label='Gaussian')
    plt.legend()

    #plt.subplot(212)
    #plot_DRR_TS(df)
    title = "".join(title.split()) + '.png'
    plt.savefig(title, format='png')
def bayes_factor(stan_model, simulation_index, day_index, kpi):
    """
    Args:
        sm (pystan.model.StanModel): precompiled Stan model object
        simulation_index (int): random seed used for the simulation
        day_index (int): time step of the peeking
        kpi (str): KPI name
    
    Returns:
        Bayes factor based on the Savage-Dickey density ratio
    """
    print("simulation:" + str(simulation_index) + ", day:" + str(day_index))
    dat = readSimulationData(simulation_index)
    df = get_snapshot(dat, day_index + 1)

    fit, traces = fit_stan(stan_model, df, kpi)
    kde = gaussian_kde(traces['delta'])
    hdi = HDI_from_MCMC(traces['delta'])
    upper = hdi[1]
    lower = hdi[0]
    prior = cauchy.pdf(0, loc=0, scale=1)

    bf_01 = kde.evaluate(0)[0] / prior
    hdi_width = upper - lower
    mean_delta = np.mean(traces['delta'])

    significant_and_stop_bf = bf_01 < 1 / 3.
    stop_bp = hdi_width < 0.08
    significant_based_on_interval = 0 < lower or 0 > upper

    return (simulation_index, day_index, bf_01, significant_and_stop_bf,
            hdi_width, stop_bp, mean_delta, lower, upper,
            significant_based_on_interval)
Esempio n. 4
0
def bayes_factor(x, y, distribution='normal'):
    """
	Args:
		x (array_like): sample of a treatment group
		y (array_like): sample of a control group
		distribution: name of the KPI distribution model, which assumes a
			Stan model file with the same name exists

	Returns:
		tuple: 
			- stop label
			- effect size (delta)
			- credible interval of delta
			- sample size of x
			- sample size of y
			- absolute mean of x
			- absolute mean of y
	"""
    traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x,
                                                   y,
                                                   distribution=distribution)
    kde = gaussian_kde(traces['delta'])

    prior = cauchy.pdf(0, loc=0, scale=1)
    # BF_01
    bf = kde.evaluate(0)[0] / prior
    stop = int(bf > 3 or bf < 1 / 3.)

    interval = HDI_from_MCMC(traces['delta'])
    print(bf, interval)

    return stop, mu_x - mu_y, {
        'lower': interval[0],
        'upper': interval[1]
    }, n_x, n_y, mu_x, mu_y
Esempio n. 5
0
def cauchy_distribution(select_size, asked=rvs, x=0):
    if asked == rvs:
        return cauchy.rvs(size=select_size)
    elif asked == pdf:
        return cauchy.pdf(x)
    elif asked == cdf:
        return cauchy.cdf(x)
Esempio n. 6
0
def lorentzian(x_0, x_i, y_i, fwhm):
    """Compute the Cauchy distribution with full-width-at-half-maximum fwhm."""
    if not np.isscalar(fwhm):
        fwhm = fwhm[0]
    gamma = fwhm/2
    z_0 = (x_0-x_i)/gamma
    y_0 = cauchy.pdf(z_0) * y_i
    return y_0
Esempio n. 7
0
def bayes_factor(x,
                 y,
                 distribution='normal',
                 num_iters=25000,
                 inference='sampling'):
    """
    Args:
        x (array_like): sample of a treatment group
        y (array_like): sample of a control group
        distribution: name of the KPI distribution model, which assumes a
            Stan model file with the same name exists
        num_iters: number of iterations of bayes sampling
        inference: sampling or variational inference method for approximation the posterior

    Returns:
        dictionary with statistics
    """
    traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x,
                                                   y,
                                                   distribution=distribution,
                                                   num_iters=num_iters,
                                                   inference=inference)
    trace_normalized_effect_size = get_trace_normalized_effect_size(
        distribution, traces)
    trace_absolute_effect_size = traces['delta']

    kde = gaussian_kde(trace_normalized_effect_size)
    prior = cauchy.pdf(0, loc=0, scale=1)
    # BF_01
    bf = kde.evaluate(0)[0] / prior
    stop = bf > 3 or bf < 1 / 3.

    credibleMass = 0.95  # another magic number
    leftOut = 1.0 - credibleMass
    p1 = round(leftOut / 2.0, 5)
    p2 = round(1.0 - leftOut / 2.0, 5)
    credible_interval = HDI_from_MCMC(trace_absolute_effect_size, credibleMass)

    return {
        'stop':
        bool(stop),
        'delta':
        float(mu_x - mu_y),
        'confidence_interval': [{
            'percentile': p * 100,
            'value': v
        } for p, v in zip([p1, p2], credible_interval)],
        'treatment_sample_size':
        int(n_x),
        'control_sample_size':
        int(n_y),
        'treatment_mean':
        float(mu_x),
        'control_mean':
        float(mu_y),
        'number_of_iterations':
        num_iters
    }
Esempio n. 8
0
def cauchyFunc():
    for i in range(len(size)):
        n = size[i]
        fig, ax = plt.subplots(1, 1)
        ax.set_title("Распределение Коши, n = " + str(n))
        x = np.linspace(cauchy.ppf(0.01), cauchy.ppf(0.99), 100)
        ax.plot(x, cauchy.pdf(x), 'b-', lw=5, alpha=0.6)
        r = cauchy.rvs(size=n)
        ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
        plt.show()
def cauchy(x, a, b, c):
	'''General Cauchy function, the probability density function (PDF) of Cauchy or Lorentz distribution.
	Parameters:
		x: independent variable
		a, b, c: parameters for function
	returns:
		y: dependent variable
	'''
	y = a * sci_cauchy.pdf(x, loc=b, scale=c)
	return y
Esempio n. 10
0
 def calc_particles_weight(self,y):
     """calculate fitness probabilities between observation value and predicted value
     w_t
     """
     locs = self.calc_pred_particles()
     self.predicted_particles = locs
     scale=np.power(10,locs[-1])
     scale[scale==0] = 1e-308
     
     # 多変量の場合などは修正が必要
     self.weights = cauchy.pdf( np.array([y]*self.PARTICLES_NUM) - self.H.dot(locs), loc=[0]*self.PARTICLES_NUM,
                             scale=scale, size=self.PARTICLES_NUM).flatten()
Esempio n. 11
0
    def on_epoch_end(self, epoch, logs={}):
        """ Updates the parameters of the distributions in the contamination model on epoch end. The parameters updated are: 'a' for the global weight of the membership to the normal distribution, 'sigmaSQ' for the variance of the normal distribution and 'gammaSQ' for the scale of the Cauchy distribution of outliers. The latent variables are updated as well: 'T_k' describing in the first column the probability of membership to normal distribution and in the second column probability of membership to the Cauchy distribution i.e. outlier. Stores evolution of global parameters (a, sigmaSQ and gammaSQ).

        Parameters
        ----------
        epoch : integer
            Current epoch in training.
        logs : keras logs
            Metrics stored during current keras training.
        """
        y_pred = self.model.predict(self.x)
        error = self.y.squeeze() - y_pred.squeeze()

        # Update parameters (M-Step)
        errorSQ = error**2
        aux = np.mean(self.T[:, 0])
        if aux > self.a_max:
            aux = self.a_max
        K.set_value(self.a, aux)
        K.set_value(self.sigmaSQ,
                    np.sum(self.T[:, 0] * errorSQ) / np.sum(self.T[:, 0]))
        # Gradient descent
        gmSQ_eval = K.get_value(self.gammaSQ)
        grad_gmSQ = (0.5 * np.sum(self.T[:, 1]) -
                     np.sum(self.T[:, 1] * errorSQ /
                            (gmSQ_eval + errorSQ))) / gmSQ_eval
        # Guarantee positivity in update
        eta = K.get_value(self.model.optimizer.lr)
        new_gmSQ = gmSQ_eval - eta * grad_gmSQ
        while new_gmSQ < 0 or (new_gmSQ / gmSQ_eval) > 1000:
            eta /= 2
            new_gmSQ = gmSQ_eval - eta * grad_gmSQ
        K.set_value(self.gammaSQ, new_gmSQ)

        # Update conditional distribution of latent variables (beginning of E-Step)
        a_eval = K.get_value(self.a)
        sigmaSQ_eval = K.get_value(self.sigmaSQ)
        gammaSQ_eval = K.get_value(self.gammaSQ)
        print('a: %f, sigmaSQ: %f, gammaSQ: %f' %
              (a_eval, sigmaSQ_eval, gammaSQ_eval))
        norm_eval = norm.pdf(error, loc=0, scale=np.sqrt(sigmaSQ_eval))
        cauchy_eval = cauchy.pdf(error, loc=0, scale=np.sqrt(gammaSQ_eval))
        denominator = a_eval * norm_eval + (1. - a_eval) * cauchy_eval
        self.T[:, 0] = a_eval * norm_eval / denominator
        self.T[:, 1] = (1. - a_eval) * cauchy_eval / denominator
        K.set_value(self.T_k, self.T)

        # store evolution of global variables
        self.avalues.append(a_eval)
        self.sigmaSQvalues.append(sigmaSQ_eval)
        self.gammaSQvalues.append(gammaSQ_eval)
def bayes_factor(x, y, distribution='normal', num_iters=25000, inference='sampling'):
    """ Bayes factor computation.

    :param x: sample of a treatment group
    :type  x: pd.Series or list (array-like)
    :param y: sample of a control group
    :type  y: pd.Series or list (array-like)
    :param distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists
    :type  distribution: str
    :param num_iters: number of iterations of bayes sampling
    :type  num_iters: int
    :param inference: sampling or variational inference method for approximation the posterior
    :type  inference: str

    :return: results of type EarlyStoppingTestStatistics (without p-value and stat. power)
    :rtype:  EarlyStoppingTestStatistics
    """

    logger.info("Started running bayes factor with {} procedure, treatment group of size {}, "
                "control group of size {}, {} distribution.".format(len(x), len(y), distribution, inference))

    traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x, y, distribution=distribution, num_iters=num_iters,
                                                   inference=inference)
    trace_normalized_effect_size = get_trace_normalized_effect_size(distribution, traces)
    trace_absolute_effect_size = traces['delta']

    kde = gaussian_kde(trace_normalized_effect_size)
    prior = cauchy.pdf(0, loc=0, scale=1)
    # BF_01
    bf = kde.evaluate(0)[0] / prior
    stop = bf > 3 or bf < 1 / 3.

    credible_mass = 0.95
    left_out      = 1.0 - credible_mass
    p1            = round(left_out/2.0, 5)
    p2            = round(1.0 - left_out/2.0, 5)
    credible_interval = HDI_from_MCMC(trace_absolute_effect_size, credible_mass)

    treatment_statistics = SampleStatistics(int(n_x), float(mu_x), float(np.nanvar(x)))
    control_statistics   = SampleStatistics(int(n_y), float(mu_y), float(np.nanvar(y)))
    variant_statistics   = BaseTestStatistics(control_statistics, treatment_statistics)

    logger.info("Finished running bayes factor with {} procedure, treatment group of size {}, "
                "control group of size {}, {} distribution.".format(len(x), len(y), distribution, inference))

    return EarlyStoppingTestStatistics(variant_statistics.control_statistics,
                                       variant_statistics.treatment_statistics,
                                       float(mu_x - mu_y),
                                       dict([(p*100, v) for p, v in zip([p1, p2], credible_interval)]),
                                       None, None, stop)
Esempio n. 13
0
    def calc_particles_weight(self, y):
        """calculate fitness probabilities between observation value and predicted value
        w_t
        """
        locs = self.calc_pred_particles()
        self.predicted_particles = locs
        scale = np.power(10, locs[-1])
        scale[scale == 0] = 1e-308

        # 多変量の場合などは修正が必要
        self.weights = cauchy.pdf(np.array([y] * self.PARTICLES_NUM) -
                                  self.H.dot(locs),
                                  loc=[0] * self.PARTICLES_NUM,
                                  scale=scale,
                                  size=self.PARTICLES_NUM).flatten()
Esempio n. 14
0
def svf_from_cav_res(date):
    """Using a scan of the cavity resonance, produce Survival vs. Field"""
    fig, axes = plt.subplots(nrows=2, sharex=False)
    fname = "1_fscan.txt"
    attn = -16  # Var Attn value
    fmax = db_feq(attn)
    folder = os.path.join("..", date)
    fname = os.path.join(folder, fname)
    data, popt = mw_fscan(fname, -1, axes[0])
    fwhm = popt[2] * 2
    loc = popt[1]
    field = fmax * cauchy.pdf(
        (data['f'].values - loc) / fwhm * 2) / cauchy.pdf(0)
    survival_fit = cauchy_model(data['f'].values, *popt) / popt[3]
    survival_data = data['nrm'] / popt[3]
    axes[1].plot(field, survival_data, '.', label="data")
    axes[1].plot(field, survival_fit, '-', lw=3, label="Fit")
    axes[1].plot(field, survival_data - survival_fit, '.', label="error")
    axes[1].axvline(0, c='k')
    for val in [1, 0.9, 0.5, 0]:
        axes[1].axhline(val, c='k')
    axes[1].legend()
    fig.tight_layout()
    return
Esempio n. 15
0
def lorentzian(E, Ec, nw):
    """
    Lorentzian profile
    
    Parameters:
        E:      energy
        Ec:     center energy
        nw:     natural width (FWHM)
    
    Return (lorentz)
        lorentz:  Lorentzian profile
    """

    gamma = fwhm2gamma(nw)

    return cauchy.pdf(E, loc=Ec, scale=gamma)
Esempio n. 16
0
def Cauchy_P(outcome, w, l):
    if w == 0 and l == 0:
        x = 0.5
    else:
        x = w/(w + l)
    sig = np.sqrt( (w+1)*(l+1)/( (w+l+2)**2 * (w+l+3) ) )

    global MC_samples
    seq.reset()
    p = seq.get(MC_samples)
    p = np.reshape(p, MC_samples)

    likelihood = p**outcome * (1-p)**(1-outcome)
    prior = cauchy.pdf(p, x, sig)
    evidence = np.mean( likelihood * prior )

    return evidence
Esempio n. 17
0
def cauchy_numbers():
    bins_num = [5, 12, 20]
    default_left_boundary = -5
    default_right_boundary = 5
    fig, axs = plt.subplots(len(units))
    for i in range(len(units)):
        samples = cauchy.rvs(scale=1, loc=0, size=units[i])
        left_boundary = min(default_left_boundary, min(samples))
        right_boundary = max(default_right_boundary, max(samples))
        axs[i].grid()
        sns.histplot(samples, stat="density", bins=bins_num[i], color='sandybrown', ax=axs[i])
        x = np.linspace(left_boundary, right_boundary, 1000)
        y = cauchy.pdf(x)
        axs[i].plot(x, y, 'k', lw=2)
        axs[i].set_xlabel("cauchyNumbers (" + str(units[i]) + " samples)")
    fig.subplots_adjust(hspace=0.75)
    fig.savefig("cauchyNumbers.pdf")
    fig.show()
Esempio n. 18
0
def bayes_factor(x, y, distribution='normal', num_iters=25000):
    """
    Args:
        x (array_like): sample of a treatment group
        y (array_like): sample of a control group
        distribution: name of the KPI distribution model, which assumes a
            Stan model file with the same name exists
        num_iters: number of iterations of bayes sampling

    Returns:
        dictionary with statistics
    """
    traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x,
                                                   y,
                                                   distribution=distribution,
                                                   num_iters=num_iters)
    kde = gaussian_kde(traces['delta'])

    prior = cauchy.pdf(0, loc=0, scale=1)
    # BF_01
    bf = kde.evaluate(0)[0] / prior
    # stop = int(bf > 3 or bf < 1 / 3.)
    stop = bf > 3 or bf < 1 / 3.

    credibleMass = 0.95  # another magic number
    leftOut = 1.0 - credibleMass
    p1 = round(leftOut / 2.0, 5)
    p2 = round(1.0 - leftOut / 2.0, 5)
    interval = HDI_from_MCMC(traces['delta'], credibleMass)

    # return stop, mu_x - mu_y, {'lower': interval[0], 'upper': interval[1]}, n_x, n_y, mu_x, mu_y
    return {
        'stop': bool(stop),
        'delta': float(mu_x - mu_y),
        'interval': {
            p1 * 100: interval[0],
            p2 * 100: interval[1]
        },
        'n_x': int(n_x),
        'n_y': int(n_y),
        'mu_x': float(mu_x),
        'mu_y': float(mu_y),
        'num_iters': num_iters
    }
Esempio n. 19
0
def bayes_factor(dataset, stan_model, kpi, day_index):
    print("day", day_index)
    df = data_before_time(dataset, day_index + 1)

    fit, traces = fit_stan(stan_model, df, kpi)
    kde = gaussian_kde(traces['delta'])
    hdi = HDI_from_MCMC(traces['delta'])
    upper = hdi[1]
    lower = hdi[0]
    prior = cauchy.pdf(0, loc=0, scale=1)

    bf_01 = kde.evaluate(0)[0] / prior
    hdi_width = upper - lower
    mean_delta = np.mean(traces['delta'])

    significant_and_stop_bf = bf_01 < 1 / 3.
    stop_bp = hdi_width < 0.08
    significant_based_on_interval = 0 < lower or 0 > upper

    return (day_index, bf_01, significant_and_stop_bf, hdi_width, stop_bp,
            mean_delta, lower, upper, significant_based_on_interval)
Esempio n. 20
0
def my_kde(frequencies,width,intensities,lineshape, gridsize=100, normalized=True, bounds=None):
    # boundaries
    if bounds:
        xmin, xmax = bounds
    else:
        xmin = min(frequencies) - 3 * width
        xmax = max(frequencies) + 3 * width

    # grid points
    x = np.linspace(xmin, xmax, gridsize)

    # compute kde
    kde = np.zeros(gridsize)
    if lineshape == "gaussian":
        for val in enumerate(frequencies):
            kde += norm.pdf(x, loc=val[1], scale=width)*intensities[val[0]]
    elif lineshape == "lorenzian":
        for val in enumerate(frequencies):
            kde += cauchy.pdf(x, loc=val[1], scale=width)*intensities[val[0]]        
    # normalized the KDE
    if normalized:
        kde /= sp.integrate.simps(kde, x)
    return x, kde
def bayes_factor(stan_model, all_data, testDay, kpi, n_iter):
    """
    Args:
        sm (pystan.model.StanModel): precompiled Stan model object
        simulation_index (int): random seed used for the simulation
        day_index (int): time step of the peeking
        kpi (str): KPI name

    Returns:
        Bayes factor based on the Savage-Dickey density ratio
    """

    snapshot = all_data[all_data.time < testDay+1]
    df = snapshot.groupby(['entity', 'variant']).mean().reset_index()

    fit, traces = fit_stan(stan_model, df, kpi, n_iter)
    kde = gaussian_kde(traces['delta'])
    hdi = HDI_from_MCMC(traces['delta'])
    hdi_width = hdi[1] - hdi[0]

    prior = cauchy.pdf(0, loc=0, scale=1)
    bf_01 = kde.evaluate(0)[0]/prior
    mean_delta = np.mean(traces['delta'])
    return bf_01, hdi_width, mean_delta
Esempio n. 22
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import cauchy, norm
import scipy.integrate

# http://aidiary.hatenablog.com/entry/20140830/1409398547

# モンテカルロ積分の収束テスト
# 練習問題3.1

N = 1000
x = 4
Inf = float("inf")

# scipy.integrateでの積分
h1 = lambda t: t * norm(loc=x).pdf(t) * cauchy.pdf(t)
h2 = lambda t: norm(loc=x).pdf(t) * cauchy.pdf(t)
num = scipy.integrate.quad(h1, -Inf, Inf)[0]
den = scipy.integrate.quad(h2, -Inf, Inf)[0]
I = num / den
print("scipy.integrate:", I)

# (1) コーシー分布からサンプリングするモンテカルロ積分の収束テスト
# 分子も分母も同じサンプルを使用すると仮定
theta = cauchy.rvs(size=N)
num = theta * norm(loc=x).pdf(theta)
den = norm(loc=x).pdf(theta)

# 分母に0がくるサンプルを削除
num = num[den != 0]
den = den[den != 0]
Esempio n. 23
0
ax.set_xlim([0.4, 0.68])
ax.set_ylim([0, 9.5])

mean, var, skew, kurt = cauchy.stats(moments='mvsk')

x = np.linspace(cauchy.ppf(0.01), cauchy.ppf(0.99), 1000000)
y = np.linspace(0, 6.6, 100)
ons = np.ones(100)

plt.rc('text', usetex=True)
plt.rc('font', family='serif')
fsize = 18

#2c0032
ax.plot(x, cauchy.pdf(x, loc=0.53432, scale=0.0382), color='#6ca6cd',
        lw=3)  #, label='$\Delta=0.0382, \sigma=0.0079$')
#ax.plot(x, cauchy.pdf(x, loc=0.55334, scale=0.0102 ), color='#8c07ee', lw=3, label='$\Delta=0.0102$')
#ax.plot(x, cauchy.pdf(x, loc=0.54545, scale=0.0024 ), color='#ad1927', lw=3, label='$\Delta=0.0024$')
#ax.plot(x, cauchy.pdf(x, loc=0.59613, scale=0.0988 ), color='#f5ce00', lw=3, label='$\Delta=0.0988$')
#ax.plot(x, cauchy.pdf(x, loc=0.53622, scale=0.0349 ), color='#ff6600', lw=3, label='$\Delta=0.0349$')

midp = 0.5 * (0.488 + 0.580)

ax.plot(0.580 * ons, y, '-', lw=1, color='#474747')
ax.plot(0.488 * ons, y, '-', lw=1, color='#474747')
ax.plot(0.588 * ons, y, '--', lw=1, color='#474747')
ax.plot(0.480 * ons, y, '--', lw=1, color='#474747')
ax.plot(0.596 * ons, y, ':', lw=1, color='#7f7f7f')
ax.plot(0.472 * ons, y, ':', lw=1, color='#7f7f7f')
x1 = np.linspace(0.488, 0.580, 100)
def fitDist(ys, func, xlabel, varNames, params, plot, x=np.arange(-2, 2,
                                                                  0.01)):
    vals, bins = np.histogram(ys, bins=x, normed=True)
    bins = bins[:-1]
    popt, _ = curve_fit(func, bins, vals)
    outputString = ", ".join(
        ["params[\"%s\"]"] * len(popt)) + " = " + ", ".join(["%f"] * len(popt))
    for varName, val in zip(varNames, popt):
        params[varName] = val

    if func == funcHypsec:
        fitLabel = "hypsec fit"
    elif func == funcNorm:
        fitLabel = "normal fit"
    elif func == funcGamma:
        fitLabel = "gamma fit"
    else:
        fitLabel = "distribution fit"
    print("  " + outputString % tuple(varNames + list(popt)))
    if plot:
        import matplotlib.pyplot as plt
        plt.figure()
        plt.title("Curve fit for " + " ".join(varNames), fontsize=14)
        plt.bar(bins,
                vals,
                width=bins[1] - bins[0],
                label='observed distribution')
        plt.plot(bins, func(bins, *popt), 'g', label=fitLabel, linewidth=2.0)
        if func == funcHypsec:
            poptNormal, _ = curve_fit(funcNorm, bins, vals)
            plt.plot(bins,
                     funcNorm(bins, *poptNormal),
                     'r',
                     label='normal fit (for comparison)',
                     linewidth=2.0,
                     alpha=0.5)

            if False:
                funcStudentT = lambda x, df, mu, sigma: t.pdf(
                    x, df=df, loc=mu, scale=sigma)
                poptStudentT, _ = curve_fit(funcStudentT, bins, vals)
                print(poptStudentT)

                funcCauchy = lambda x, mu, sigma: cauchy.pdf(
                    x, loc=mu, scale=sigma)
                poptCauchy, _ = curve_fit(funcCauchy, bins, vals)
                print(poptCauchy)

                plt.plot(bins,
                         funcStudentT(bins, *poptStudentT),
                         'm',
                         label='student-t fit',
                         linewidth=2.0)
                plt.plot(bins,
                         funcCauchy(bins, *poptCauchy),
                         'c',
                         label='cauchy fit',
                         linewidth=2.0)

                funcLogStudentT = lambda x, df, mu, sigma: t.logpdf(
                    x, df=df, loc=mu, scale=sigma)
                funcLogNorm = lambda x, mu, sigma: norm.logpdf(
                    x, loc=mu, scale=sigma)
                funcLogCauchy = lambda x, mu, sigma: cauchy.logpdf(
                    x, loc=mu, scale=sigma)

                plt.ylabel("relative frequency", fontsize=14)
                plt.xlabel(xlabel, fontsize=14)
                plt.legend()

                plt.figure()
                plt.plot(bins,
                         funcLogHypsec(bins, *popt),
                         'g',
                         label='hypsec log fit',
                         linewidth=2.0)
                plt.plot(bins,
                         funcLogNorm(bins, *poptNormal),
                         'r',
                         label='normal log fit',
                         linewidth=2.0)
                plt.plot(bins,
                         funcLogStudentT(bins, *poptStudentT),
                         'm',
                         label='student-t log fit',
                         linewidth=2.0)
                plt.plot(bins,
                         funcLogCauchy(bins, *poptCauchy),
                         'c',
                         label='cauchy log fit',
                         linewidth=2.0)
        plt.ylabel("relative frequency", fontsize=14)
        plt.xlabel(xlabel, fontsize=14)
        plt.legend()
        plt.tight_layout()
Esempio n. 25
0
from scipy.stats import cauchy
print(cauchy.pdf(2,3,3))
Esempio n. 26
0
#!/usr/bin/python

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import cauchy

fig, ax = plt.subplots(1,1)
mean,var,skew,kurt = cauchy.stats(moments='mvsk')

x = np.linspace(cauchy.ppf(0.01),cauchy.ppf(0.99),1000)

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

ax.plot(x, cauchy.pdf(x), color='#2c0032', lw=3, label='Standardized')
ax.plot(x, cauchy.pdf(x,loc=0,scale=0.5), color='#8c07ee', lw=3, label='$\Delta$ = 0.5')
#ax.plot(x, cauchy.pdf(x), color='#2f0059', lw=3, label='Standardized')
ax.plot(x, cauchy.pdf(x,loc=0,scale=2.0), color='#ad1927', lw=3, label='$\Delta$ = 2.0')
ax.plot(x, cauchy.pdf(x,loc=0,scale=3.0), color='#f5ce00', lw=3, label='$\Delta$ = 3.0')

ax.legend(loc='best',frameon=False)
ax.set_xlim([-13.0,13.0])
#ax.set_title('Cauchy PDF')

plt.text(-11,0.5,'$f(z) = \\frac{\Delta}{\pi(z^2 + \Delta^2)}$', fontsize=20)
fig.savefig('cauchy.eps',format='eps',dpi=1000)
plt.show()
Esempio n. 27
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import cauchy

fig, ax = plt.subplots(1,1)
mean,var,skew,kurt = cauchy.stats(moments='mvsk')

x = np.linspace(cauchy.ppf(0.01),cauchy.ppf(0.99),100000)
y = np.linspace(0,140,100)
y2 = np.linspace(0,80,100)
ons = np.ones(100)

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

ax.plot(x, cauchy.pdf(x, loc=0.53432, scale=0.0382 ), color='#2c0032', lw=3, label='$\Delta=0.0382$')
ax.plot(x, cauchy.pdf(x, loc=0.55334, scale=0.0102 ), color='#8c07ee', lw=3, label='$\Delta=0.0102$')
ax.plot(x, cauchy.pdf(x, loc=0.54545, scale=0.0024 ), color='#ad1927', lw=3, label='$\Delta=0.0024$')
ax.plot(x, cauchy.pdf(x, loc=0.59613, scale=0.0988 ), color='#f5ce00', lw=3, label='$\Delta=0.0988$')
ax.plot(x, cauchy.pdf(x, loc=0.53622, scale=0.0349 ), color='#ff6600', lw=3, label='$\Delta=0.0349$')
ax.plot(x, cauchy.pdf(x, loc=0.5237,  scale=0.0295 ), color='#6ca6cd', lw=3, label='$\Delta=0.0295$')
ax.plot(0.695*ons,y2,'k--',lw=1)
ax.plot(0.494*ons,y,'k--',lw=1)
ax.legend(loc='best',frameon=False)
ax.set_xlim([0.425,0.75])

#ax.set_title('Cauchy PDF')

#plt.text(-2.0,0.2,'$\Delta=0.53$',fontsize=12)
#plt.text(-0.4,0.27,'$\Delta=0.55$',fontsize=12)
#plt.text(0.5,0.3,'$\Delta=0.54$',fontsize=12)
Esempio n. 28
0
revenues = Revenue(products)
GE = 200
for j in products[1:]:
    revenues[j] = GE
    GE = GE + 200
del GE

# Buchungsperioden
times = Time(100)

# Normalverteilung
from scipy.stats import cauchy

# Wahrscheinlichkeiten
probs = Prob(products, times)
probs[1][81:101] = 5*cauchy.pdf(np.arange(81, 101), 81, 5)
probs[2][61:101] = 5*cauchy.pdf(np.arange(61, 101), 61, 5)
probs[3][41:101] = 5*cauchy.pdf(np.arange(41, 101), 41, 5)
probs[4][21:101] = 5*cauchy.pdf(np.arange(21, 101), 21, 5)
probs[5][1:101] = 5*cauchy.pdf(np.arange(1, 101), 1, 5)

# Gegenwahrscheinlichkeiten
against_probs = Against_Prob(probs)
probs[0] = against_probs


# In[48]:

from Parameter import Stock_Resource, Max_Stock_Resource, Condition_Storage

n_samples = 20000
n_bins = int(2 * n_samples**(1 / 2))
dist_center = 1.5


def p(x):
    return 1 / (1 + (x - dist_center)**2)


x0 = 1.0  # initial value for Markov chain
vals = rmt.random_walk.sample_random_walk(n_samples, x0, p)

mean_val = np.mean(vals)

plt.hist(vals, bins=n_bins, alpha=0.8, density=True)
window = 8
xlims = (dist_center - window, dist_center + window)

xvals = np.linspace(*xlims, 200)
yvals = cauchy.pdf(xvals, dist_center)
plt.plot(xvals, yvals)
plt.xlim(*xlims)
plt.axvline(
    mean_val,
    0,
    1,
)

plt.show()
#                    cv=5,
#                    verbose = 1)
#grid.fit(Zarr[:, None]);
#print('Best params:',grid.best_params_)

#kde = KernelDensity(bandwidth=grid.best_params_['bandwidth'], 
kde = KernelDensity(bandwidth=1, 
                    kernel='gaussian')
kde.fit(Zarr[:, None])


### Plots
# Remove large values for ease of plotting
Zarr = Zarr[(Zarr < 100) & (Zarr > -100)]
x_d = np.linspace(-100,100,1000)
cfit = cauchy.pdf(x_d,loc=loc,scale=sca)
nfit = norm.pdf(x_d,loc=locnorm,scale=scanorm)
tfit = t.pdf(x_d,df=dft,loc=loct,scale=scat)

logprob_kde = kde.score_samples(x_d[:, None])

pdf_cmb_array = []
for x in x_d:
    pdf_cmb_array.append(1/ncomb * pdf_cmb(x))
#    pdf_cmb_array.append(pdf_cmb(x))

pdf_cmb_array = np.array(pdf_cmb_array)

_ = plt.hist(Zarr,bins=100,normed=True,histtype='step')
plt.plot(x_d,cfit,'k-') # Cauchy fit
plt.plot(x_d,nfit,'k--') # Normal fit
                        verbose = 1,
                        n_jobs = -2)
    grid.fit(dist_filt[:, None])

    ### KDE representation
    kde = KernelDensity(bandwidth=grid.best_params_['bandwidth'], 
                        kernel='gaussian')
    kde.fit(dist_filt[:, None])

    logprob_kde = kde.score_samples(x_d[:, None])
    pdfkde = np.exp(logprob_kde)
    
        
    ### Fit a Cauchy distribution 
    loc,scale = cauchy.fit(dist_filt)
    ncauchy = cauchy.pdf(x_d,loc=loc,scale=scale)
    
    ### Print info and plot
    print(idx,dmin,dmax,np.abs(np.mean(dist)),grid.best_params_['bandwidth'],data['metric'][idx])
    p = ax.plot(x_d,pdfkde)
    axins.plot(wl_vec,f_eps(wl_vec,1))
    
    if plot_cauchy:
        ax.plot(x_d,ncauchy,linestyle='dashed',color=p[-1].get_color())
    
    idxM = np.argmax(pdfkde)
    ax.text(x_d[idxM],pdfkde[idxM],data['metric'][idx])
    
    ### Maximum of all of the PDFs
    maxpdf = max(maxpdf,np.max(pdfkde))
    maxpdf = max(maxpdf,np.max(ncauchy))
Esempio n. 32
0
## to control the cauchy is what we think it is haha
# x = np.linspace(cauchy.ppf(0.01), cauchy.ppf(0.99), 1000)
# fig, ax = plt.subplots(1,1)
# ax.plot(x, cauchy.pdf(x, scale=g), label='scipy')
# ax.plot(x, g/(np.pi*(g**2 + x**2)), '--', label='custom')
# plt.legend()
# plt.show()

# sample N x's

N = 10000
mc_runs = 1000
W_tilde = np.zeros((mc_runs, N))
for mc in range(mc_runs):
    x = cauchy.rvs(size=N, scale=g)
    q_pdf = cauchy.pdf(x, scale=g)
    # pi_pdf = norm.pdf(x, loc=0, scale=1)
    pi_pdf = np.exp(-x**2 / 2)
    w_tilde = pi_pdf / q_pdf
    w = w_tilde / w_tilde.sum()
    W_tilde[mc, :] = w_tilde

plt.hist(np.mean(W_tilde[:, 0:100], axis=1),
         bins=50,
         density=True,
         alpha=0.6,
         label='N=100')
plt.hist(np.mean(W_tilde[:, 0:1000], axis=1),
         bins=50,
         density=True,
         alpha=.6,
Esempio n. 33
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import cauchy

fig, ax = plt.subplots(1,1)
mean,var,skew,kurt = cauchy.stats(moments='mvsk')

x = np.linspace(cauchy.ppf(0.01),cauchy.ppf(0.99),1000000)
y = np.linspace(0.5,6.5,100)
ons = np.ones(100)

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

#2c0032
ax.plot(x, cauchy.pdf(x, loc=0.53432, scale=0.0382 ), color='#6ca6cd', lw=3, label='$\Delta=0.0382, \sigma=0.0079$')
#ax.plot(x, cauchy.pdf(x, loc=0.55334, scale=0.0102 ), color='#8c07ee', lw=3, label='$\Delta=0.0102$')
#ax.plot(x, cauchy.pdf(x, loc=0.54545, scale=0.0024 ), color='#ad1927', lw=3, label='$\Delta=0.0024$')
#ax.plot(x, cauchy.pdf(x, loc=0.59613, scale=0.0988 ), color='#f5ce00', lw=3, label='$\Delta=0.0988$')
#ax.plot(x, cauchy.pdf(x, loc=0.53622, scale=0.0349 ), color='#ff6600', lw=3, label='$\Delta=0.0349$')

ax.plot(0.580*ons,y,'-',lw=1,color='#474747')
ax.plot(0.488*ons,y,'-',lw=1,color='#474747')
ax.plot(0.588*ons,y,'--',lw=1,color='#474747')
ax.plot(0.480*ons,y,'--',lw=1,color='#474747')
ax.plot(0.596*ons,y,':',lw=1,color='#7f7f7f')
ax.plot(0.472*ons,y,':',lw=1,color='#7f7f7f')
x1 = np.linspace(0.488,0.580,100)
x2 = np.linspace(0.480,0.588,100)
x3 = np.linspace(0.472,0.596,100)
ax.plot(x1,5*ons,'-',color='r')
Esempio n. 34
0
from scipy.stats import cauchy

print(cauchy.pdf(2, 3, 3))
Esempio n. 35
0
# prior constraints:
# smooth, median(theta)=0, mode(theta)=1, theta can be (-inf,1),(-1,0),(0,1),(1,inf) with p=0.25

# taking a gaussian prior which satisfies the above constraints
prior_var = 2.19
prior_mu = 0
prior_std = math.sqrt(prior_var)
p_range = norm.cdf(1, prior_mu, prior_std) - norm.cdf(-1, prior_mu, prior_std)
assert np.allclose(p_range, 0.5, 1e-2)

# Computing posterior mean using gaussian prior
post_var = 1 / (1 / obs_var + 1 / prior_var)
post_mean = post_var * (prior_mu / prior_var + obs_x / obs_var)

assert np.allclose(post_mean, 3.43, 1e-2)

# taking a cauchy prior which satisfies the above constraints
loc = 0
scale = 1
p_range = cauchy.cdf(1, loc, scale) - cauchy.cdf(-1, loc, scale)
assert np.allclose(p_range, 0.5, 1e-2)

# Computing posterior mean using cauchy prior
inf = 5.2
lik = lambda theta: norm.pdf(obs_x, theta, obs_std)
prior = lambda theta: cauchy.pdf(obs_x, theta, obs_std)
post = lambda theta: lik(theta) * prior(theta)
Z = integrate.quad(post, -inf, inf)[0]
post_mean = integrate.quad(lambda theta: theta * post(theta) / Z, -inf, inf)[0]
assert np.allclose(post_mean, 4.56, 1e-2)
Esempio n. 36
0
def lorentz(x, loc=0, scale=1, mag=1):
    return cauchy.pdf(x, loc=loc, scale=scale) * mag
Esempio n. 37
0
def decide_parameters_and_distribution(disname):
    params_lst = []
    cont = 1
    if disname == "norm":
        while True:
            m, s = map(float, input("m:平均,s:分散:").split())
            params_lst.append([m, s])
            cont = int(input("0:終了する.それ以外の数字:続ける"))
            if cont == 0:
                break

        for param in params_lst:
            mu = param[0]
            s = param[1]
            X = np.arange(start=mu - 3 * s, stop=mu + 3 * s, step=0.1)
            norm_pdf = norm.pdf(x=X, loc=mu, scale=s)
            plt.plot(X, norm_pdf, label="mu={},sigma={}".format(mu, s))
        plt.legend()
        plt.show()
        return

    if disname == "expon":
        while True:
            lam = float(input("lam:平均"))
            params_lst.append(lam)
            cont = int(input("0:終了する:"))
            if cont == 0:
                break

        for param in params_lst:
            lam = param
            X = np.arange(start=-1, stop=15, step=0.1)
            norm_pdf = expon.pdf(x=X, loc=lam)
            plt.plot(X, norm_pdf, label="λ={}".format(lam))
        plt.legend()
        plt.show()
        return

    if disname == "gamma":
        while True:
            k, theta = map(float, input("k:形状,theta:尺度:").split())
            params_lst.append([k, theta])
            cont = int(input("0:終了する:"))
            if cont == 0:
                break

        for param in params_lst:
            k = param[0]
            theta = param[1]
            X = np.arange(start=-1, stop=k * (theta**2), step=0.1)
            norm_pdf = gamma.pdf(x=X, a=k, scale=theta)
            plt.plot(X, norm_pdf, label="k={},theta={}".format(k, theta))
        plt.legend()
        plt.show()
        return

    if disname == "beta":
        while True:
            a, b = map(float, input("a:形状母数,b:形状母数:").split())
            params_lst.append([a, b])
            cont = int(input("0:終了する:"))
            if cont == 0:
                break

        for param in params_lst:
            a = param[0]
            b = param[1]
            X = np.arange(start=0, stop=1, step=0.01)
            norm_pdf = beta.pdf(x=X, a=a, b=b)
            plt.plot(X, norm_pdf, label="a={},b={}".format(a, b))
        plt.legend()
        plt.show()
        return

    if disname == "cauchy":
        X = np.arange(start=-2, stop=2, step=0.1)
        norm_pdf = cauchy.pdf(x=X, )
        plt.plot(X, norm_pdf)
        plt.legend()
        plt.show()
        return

    if disname == "log_normal":
        while True:
            m, s = map(float, input("m:平均,s:分散:").split())
            params_lst.append([m, s])
            cont = int(input("0:終了する.それ以外の数字:続ける"))
            if cont == 0:
                break

        for param in params_lst:
            mu = param[0]
            s = param[1]
            X = np.arange(start=0, stop=mu + 3 * s, step=0.1)
            norm_pdf = lognorm.pdf(x=X, s=mu, scale=s)
            plt.plot(X, norm_pdf, label="mu={},sigma={}".format(mu, s))
        plt.legend()
        plt.show()
        return

    if disname == "cauchy":
        X = np.arange(start=-2, stop=2, step=0.1)
        norm_pdf = cauchy.pdf(x=X, )
        plt.plot(X, norm_pdf)
        plt.legend()
        plt.show()
        return

    if disname == "pareto":
        while True:
            a, s = map(float, input("a:平均,s:分散:").split())
            params_lst.append([a, s])
            cont = int(input("0:終了する.それ以外の数字:続ける"))
            if cont == 0:
                break

        for param in params_lst:
            mu = param[0]
            s = param[1]
            X = np.arange(start=0, stop=mu + 3 * s, step=0.1)
            norm_pdf = lognorm.pdf(x=X, s=a, scale=s)
            plt.plot(X, norm_pdf, label="mu={},sigma={}".format(a, s))
        plt.legend()
        plt.show()
        return

    if disname == "wible":
        while True:
            a, b = map(float, input("a:形状母数,s:経常母数:").split())
            params_lst.append([a, b])
            cont = int(input("0:終了する.それ以外の数字:続ける"))
            if cont == 0:
                break

        for param in params_lst:
            a = param[0]
            b = param[1]
            X = np.arange(start=0, stop=a + 3 * b, step=0.1)
            norm_pdf = lognorm.pdf(x=X, s=a, scale=b)
            plt.plot(X, norm_pdf, label="mu={},sigma={}".format(a, b))
        plt.legend()
        plt.show()
        return
Esempio n. 38
0
def cauchy_model(x, a, loc, scale, y0):
    return a * cauchy.pdf(x, loc, scale) + y0
Esempio n. 39
0
def plot_histogram(filename, 
    column_names=[], skip_cols=[], nbins=10, trimends=False,
    autosave=False, save_directory='', save_format='svg', delimiter=None):
    """
    Plots a histogram formed from the columns of the specified file.

    If column_names is specified, the titles of the plots will be renamed
    accordingly.  Otherwise "Title" is inserted instead.

    skip_cols specifies any columns in the data that should be skipped.
    Columns at the end of the line may be skipped by using negative numbers.
    In this scheme the last column in a row is -1.
    """
    infile = open(filename, 'r')
    if(delimiter):
        data = loadtxt(infile, dtype=float, delimiter=',')
    else:
        data = loadtxt(infile, dtype=float)       
    infile.close()

    end_col = data.shape[1]
    
    norm_stats = list()
    cauchy_stats = list()

    # Reinterpret any negative numbers in skip_cols to be at the end of the line
    for column in range(0, len(skip_cols)):
        if skip_cols[column] < 0:
            skip_cols[column] = end_col + skip_cols[column]
       
    namecol = 0 
    for column in range(0, end_col):
        # Skip the column if instructed to do so:
        if(column in skip_cols):
            continue;

        # extract the data column:
        temp = data[:,column]
        
        if(trimends):
            minval = min(temp)
            maxval = max(temp)
            
            temp = filter(lambda x: x > minval, temp)
            temp = filter(lambda x: x < maxval, temp)
        
        # plot a histogram of the data:
        [n, bins, patches] = plt.hist(temp, bins=nbins, normed=True, label='Binned data')
        
        # fit a normal distribution:
        [norm_mu, norm_sigma] = norm.fit(temp)
        y = mlab.normpdf(bins, norm_mu, norm_sigma)
        legend_gauss = r'Normal: $\mu=%.3f,\ \sigma=%.3f$' % (norm_mu, norm_sigma)
        l = plt.plot(bins, y, 'r--', linewidth=2, label=legend_gauss)
        
        # fit a Lorentz/Cauchy distribution:
        # bug workaround for http://projects.scipy.org/scipy/ticket/1530
        # - specify a starting centroid value for the fit
        [cauchy_mu, cauchy_gamma] = cauchy.fit(temp, loc=norm_mu)
        y = cauchy.pdf(bins, loc=cauchy_mu, scale=cauchy_gamma)
        legend_cauchy = r'Cauchy: $\mu=%.3f,\ \gamma=%.3f$' % (cauchy_mu, cauchy_gamma)
        l = plt.plot(bins, y, 'g--', linewidth=2, label=legend_cauchy)
        
        # now setup the axes labels:
        try:
            title = column_names[namecol]
            namecol += 1
        except:
            title = "Title"
        
        plt.title(title)
        plt.xlabel("Value")
        plt.ylabel("Frequency")
        plt.legend(loc='best')
        
        if autosave:
            plt.savefig(save_directory + '/stats_hist_' + title + '.' + save_format, transparent=True, format=save_format)    
            plt.close()
        else:
            plt.show()
            
        # Add in the statistical information.
        norm_stats.append([title, norm_mu, norm_sigma])
        cauchy_stats.append([title, cauchy_mu, cauchy_gamma])


    # Now either print out or save the statistical information
    if(not autosave):
        print "Normal Statistics:"
        
    write_statistics(save_directory + '/stats_normal.txt', norm_stats, autosave)
    
    if(not autosave):
        print "Cauchy Statistics:"
        
    write_statistics(save_directory + '/stats_cauchy.txt', cauchy_stats, autosave)