def thompsonBernoulli(a, b, banditArms):
    numArms = len(banditArms)
    successCounters = np.zeros(numArms)
    failCounters = np.zeros(numArms)
    thetas = np.zeros(numArms)

    for t in range(5000):
        # draw arms according to beta distribution
        for i in range(numArms):
            thetas[i] = beta(successCounters[i] + a, failCounters[i] + b).rvs()

        # get the arm with max theta
        maxArmInd = np.argmax(thetas)

        # draw the maxArmInd and observe the reward
        reward = banditArms[maxArmInd].rvs()

        if reward == 1:
            successCounters[maxArmInd] += 1
        else:
            failCounters[maxArmInd] += 1

    betaDists = [beta(successCounters[i] + a, failCounters[i] + b) for i in range(numArms)]

    return betaDists
Example #2
0
def calc():
    a, b = 100, 1
    e1 = beta(a, b).entropy()
    a, b = 100, 2
    e2 = beta(a, b).entropy()
    # if e2>e1 , entropy is increased regardness additional observation.
    print e2 - e1, e2 > e1
Example #3
0
 def test_product_basis(self):
     import time
     comp = (stats.beta(0.5, 0.5), stats.beta(0.5, 0.5))
     rv = best.random.RandomVectorIndependent(comp)
     print str(rv)
     prod = best.gpc.ProductBasis(degree=10, rv=rv)
     print str(prod)
     x = rv.rvs(size=10)
     print prod(x).shape
     x1 = np.linspace(1e-4, 0.99, 64)
     x2 = np.linspace(1e-4, 0.99, 64)
     X1, X2 = np.meshgrid(x1, x2)
     xx = np.vstack([X1.flatten(), X2.flatten()]).T
     z = rv.pdf(xx)
     Z = z.reshape((64, 64))
     #plt.contourf(X1, X2, np.log(Z).T)
     #plt.show()
     start_time = time.time()
     phi = prod(xx)
     end_time = time.time()
     print("Elapsed time was %g seconds" % (end_time - start_time))
     print phi.shape
     for j in range(phi.shape[1]):
         plt.contourf(X1, X2, phi[:, j].reshape((64, 64)))
         plt.show()
Example #4
0
def make_first_set_of_plots():
    N = 1000
    x = zeros(shape=(N,), dtype=float)
    t = None
    tmax = 10
    axis([0,tmax,0,1])
    for i in range(N):
        t, y = random_walk(0.25, tmax, 0.01, t)
        x[i] = y[-1]
        if (i < 3):
            plot(t, (y+1)/2.0)

    xlabel("time")
    ylabel("CTR")
    savefig("random_walk.png")

    clf()
    subplot(211)
    hist((x+1)/2, bins=50)
    ylabel("Monte carlo results")

    subplot(212)
    best_fit = beta.fit((x+1)/2, floc=0, fscale=1)

    print best_fit
    ctr = arange(0,1,0.001)
    plot(ctr, beta(1,4).pdf(ctr), label="Invariant distribution, beta(1,4)")
    plot(ctr, beta(best_fit[0],best_fit[1]).pdf(ctr), label="Best fit, beta("+str(best_fit[0]) + "," + str(best_fit[1]) + ")")
    xlabel("CTR at t="+str(tmax))
    ylabel("pdf")
    legend()
    savefig("long_term_random_walk_result.png")
Example #5
0
def sample_hyperparameters(state):
    # http://bit.ly/1baZ3zf
    T = state['T']
    num_samples = 10  # R
    aalpha = 5
    balpha = 0.1
    abeta = 0.1
    bbeta = 0.1
    bgamma = 0.1  # ?
    agamma = 5  # ?

    # for (int r = 0; r < R; r++) {
    for r in range(num_samples):
        # gamma: root level (Escobar+West95) with n = T
        eta = beta(state['gamma'] + 1, T).rvs()
        bloge = bgamma - np.log(eta)
        K = state['num_topics']
        pie = 1. / (1. + (T * bloge / (agamma + K - 1)))
        u = bernoulli(pie).rvs()
        state['gamma'] = gamma(agamma + K - 1 + u, 1. / bloge).rvs()

        # alpha: document level (Teh+06)
        qs = 0.
        qw = 0.

        for m, doc in enumerate(state['docs']):
            qs += bernoulli(len(doc) * 1. / (len(doc) + state['alpha'])).rvs()
            qw += np.log(beta(state['alpha'] + 1, len(doc)).rvs())
        state['alpha'] = gamma(aalpha + T - qs, 1. / (balpha - qw)).rvs()

    state = update_beta(state, abeta, bbeta)
    return state
Example #6
0
def main():
	dist = stats.beta(10,5)
	target = stats.beta(10,20)
	pvar = 0.01
	steps = 100000
	bins = 10
	savefile = 'states.csv'
	plotfile = 'dists.png'

	prev = np.random.random()

	states = []
	counts = np.zeros(bins)
	counts[rounded(prev,bins)] += 1
	for i in xrange(steps):
		cur = stats.norm(prev,pvar).rvs()
		counts[rounded(cur,bins)] += 1
		counts /= np.sum(counts)
		curlik = dist.pdf(cur)*target.pdf(cur)
		a = dist.pdf(cur)/dist.pdf(prev)
		if a > np.random.random(): prev = cur
		states.append(prev)
	np.savetxt(savefile, states)

	xr = np.linspace(0,1,1000)
	plt.plot(xr,dist.pdf(xr))
	plt.hist(states, alpha=.5, normed=1)
	plt.savefig(plotfile)
def f3TruncNormRVSnp(parameters):
    N = parameters['N']
    target = parameters['target']
    rv1, rv2, rv3 = ndarray(shape = (N,), dtype=float), ndarray(shape = (N,), dtype=float), ndarray(shape = (N,), dtype=float)

    # if parameters['ncpu']:
    #     ncpu = parameters['ncpu']
    # else:
    #     ncpu = mp.cpu_count()
    #
    # pool = mp.Pool(ncpu)
    # workers = []
    if not parameters['distribution']:
        print 'No distribution set...abort'
        exit(1)
    elif parameters['distribution'] == 'truncnorm':
        a1, b1 = (parameters['min_intrv1'] - parameters['mu1']) / parameters['sigma1'], (parameters['max_intrv1'] - parameters['mu1']) / parameters['sigma1']
        a2, b2 = (parameters['min_intrv2'] - parameters['mu2']) / parameters['sigma2'], (parameters['max_intrv2'] - parameters['mu2']) / parameters['sigma2']
        a3, b3 = (parameters['min_intrv3'] - parameters['mu3']) / parameters['sigma3'], (parameters['max_intrv3'] - parameters['mu3']) / parameters['sigma3']
        rv1 = truncnorm(a1, b1, loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = truncnorm(a2, b2, loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = truncnorm(a3, b3, loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'norm':
        rv1 = norm(loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = norm(loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = norm(loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'uniform':
        rv1 = uniform(loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = uniform(loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = uniform(loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'beta':
        rv1 = beta(a=parameters['min_intrv1'], b=parameters['max_intrv1'], loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = beta(a=parameters['min_intrv2'], b=parameters['max_intrv2'], loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = beta(a=parameters['min_intrv3'], b=parameters['max_intrv3'], loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'triang':
        rv1 = triang(loc=parameters['min_intrv1'], scale=parameters['max_intrv1'], c=parameters['mu1']).rvs(N)
        rv2 = triang(loc=parameters['min_intrv2'], scale=parameters['max_intrv2'], c=parameters['mu2']).rvs(N)
        rv3 = triang(loc=parameters['min_intrv3'], scale=parameters['max_intrv3'], c=parameters['mu3']).rvs(N)
    else:
        print 'Distribution not recognized...abort'
        exit(1)

    if parameters['scaling']:
        #scale the values of Qs in the allowed range such that sum(Q_i) = A
        r = ABS(parameters['Q1']) + ABS(parameters['Q2']) + ABS(parameters['Q3'])
        if r == 0.0:
            r = 1.

        # rounding the values, the sum could exceed A
        Q1 = ABS(parameters['Q1']) * parameters['A'] / r
        Q2 = ABS(parameters['Q2']) * parameters['A'] / r
        Q3 = parameters['A'] - Q1 - Q2
    else:
        # print "scaling = False"
        Q1 = parameters['Q1']
        Q2 = parameters['Q2']
        Q3 = parameters['Q3']

    return _f3(rv1, rv2, rv3, Q1, Q2, Q3, target)
Example #8
0
def greedy_allocation3(parameters):
    """
    Greedy heuristic for 3 supplier (the same as heu_allocation3 but with different parameters)
    Does not write on the file but returns the solution
    :param df: dataframe containing the data from the excel file
    :param parameters: parameters dict
    :return: write o the df and save on the file
    """
    if not parameters['distribution']:
        print 'No distribution set...abort'
        exit(1)
    elif parameters['distribution'] == 'truncnorm':
        rv1 = truncnorm_custom(parameters['min_intrv1'], parameters['max_intrv1'], parameters['mu1'], parameters['sigma1'])
        rv2 = truncnorm_custom(parameters['min_intrv2'], parameters['max_intrv2'], parameters['mu2'], parameters['sigma2'])
        rv3 = truncnorm_custom(parameters['min_intrv3'], parameters['max_intrv3'], parameters['mu3'], parameters['sigma3'])
    elif parameters['distribution'] == 'norm':
        rv1 = norm(parameters['mu1'], parameters['sigma1'])
        rv2 = norm(parameters['mu2'], parameters['sigma2'])
        rv3 = norm(parameters['mu3'], parameters['sigma3'])
    elif parameters['distribution'] == 'uniform':
        rv1 = uniform(loc=parameters['mu1'], scale=parameters['sigma1'])
        rv2 = uniform(loc=parameters['mu2'], scale=parameters['sigma2'])
        rv3 = uniform(loc=parameters['mu3'], scale=parameters['sigma3'])
    elif parameters['distribution'] == 'beta':
        rv1 = beta(a=parameters['min_intrv1'], b=parameters['max_intrv1'], loc=parameters['mu1'], scale=parameters['sigma1'])
        rv2 = beta(a=parameters['min_intrv2'], b=parameters['max_intrv2'], loc=parameters['mu2'], scale=parameters['sigma2'])
        rv3 = beta(a=parameters['min_intrv3'], b=parameters['max_intrv3'], loc=parameters['mu3'], scale=parameters['sigma3'])
    elif parameters['distribution'] == 'triang':
        rv1 = triang(loc=parameters['min_intrv1'], scale=parameters['max_intrv1'], c=parameters['mu1'])
        rv2 = triang(loc=parameters['min_intrv2'], scale=parameters['max_intrv2'], c=parameters['mu2'])
        rv3 = triang(loc=parameters['min_intrv3'], scale=parameters['max_intrv3'], c=parameters['mu3'])
    else:
        print 'Distribution not recognized...abort'
        exit(1)

    A = parameters['A']
    Q = {i: 0 for i in xrange(3)}

    while A > 0:
        best_probability = -1
        best_retailer = -1
        for n, r in enumerate([rv1, rv2, rv3]):
            p = 1 - r.cdf(Q[n]+1)
            if p > best_probability:
                best_probability = p
                best_retailer = n

        Q[best_retailer] += 1
        A -= 1

    parameters['Q1'] = Q[0]
    parameters['Q2'] = Q[1]
    parameters['Q3'] = Q[2]

    return {'Q1': Q[0],
            'Q2': Q[1],
            'Q3': Q[2],
            'PROB': f3TruncNormRVSnp(parameters)}
Example #9
0
 def sample_sticks(self):
     for node in self.tssb.dfs():
         node.nu = stats.beta(node.point_count + 1, node.path_count + node.alpha).rvs()
         children = sorted(list(node.children.keys()))[::-1]
         count = 0
         for i in children:
             child = node.children[i]
             node.psi[i] = stats.beta(child.path_count + 1, count + node.gamma).rvs()
             count += child.path_count
Example #10
0
    def estimate(self, time):
        if len(self.estimates) == 0:
            return stats.beta(1, 1)
        else:
            latest, alpha, beta = self.estimates[-1]
            trust = self.trust(time - latest)
            alpha = 1 + trust * (alpha - 1)
            beta = 1 + trust * (beta - 1)

            return stats.beta(alpha, beta)
Example #11
0
def generate_samples():
    rv1 = beta(1.0 / 3, 1.0)
    rv2 = beta(0.5, 0.5)

    sample1 = rv1.rvs(size=N)
    np.save('rv1_sample', sample1)
    np.save('rv1_pdf', rv1.pdf(sample1))

    sample2 = rv2.rvs(size=N)
    np.save('rv2_sample', sample2)
    np.save('rv2_pdf', rv2.pdf(sample2))
Example #12
0
def test_jacobi_consistency():
    import scipy.stats as stats
    dist = stats.beta(2, 3, loc= -1, scale=2)
    p = JacobiPolynomials(alpha=2, beta=1, a= -1, b=1, normalised=False)
    assert_false(p.normalised)
    _check_poly_consistency(p, dist)

    dist = stats.beta(2, 1.5, loc= -2, scale=5)
    p = JacobiPolynomials(alpha=0.5, beta=1, a= -2, b=3)
    assert_true(p.normalised)
    _check_poly_consistency(p, dist)
def main():
    results = []
    plot(results, save=True)
    for n in range(500):
        result = np.random.binomial(1, p=0.25)
        results.append(result)
        a , b = sum(results) , len(results) - sum(results)
        mean, std, entropy = beta(a + 1 , b + 1).mean(), beta(a + 1 , b + 1).std(), beta(a + 1 , b + 1).entropy()
        # print "face:" + str(a) + "," + "tail:" + str(b)
        # print "mean:%1.3f,std:%1.3f,entropy:%1.3f" % (beta(a + 1, b + 1).mean(), beta(a + 1, b + 1).std(), beta(a + 1, b + 1).entropy())
        if len(results) % 10 == 0:
            plot(results, save=True)
Example #14
0
def dirichlet_sample_approximation(base_measure, alpha, tol=0.01):
    betas = []
    pis = []
    betas.append(beta(1, alpha).rvs()) #sample from beta function(1, alpha)
    pis.append(betas[0])
    while sum(pis) < (1.-tol):         #sum(pis) to infinity, we can get 1..
        s = np.sum([np.log(1 - b) for b in betas])
        new_beta = beta(1, alpha).rvs()
        betas.append(new_beta)
        pis.append(new_beta * np.exp(s))
    pis = np.array(pis)
    thetas = np.array([base_measure() for _ in pis])
    return pis, thetas
Example #15
0
def modality_models():
    parameter = 20.
    rv_included = stats.beta(parameter, 1)
    rv_excluded = stats.beta(1, parameter)
    rv_middle = stats.beta(parameter, parameter)
    rv_uniform = stats.uniform(0, 1)
    rv_bimodal = stats.beta(1. / parameter, 1. / parameter)

    models = {'included': rv_included,
              'excluded': rv_excluded,
              'middle': rv_middle,
              'uniform': rv_uniform,
              'bimodal': rv_bimodal}
    return models
Example #16
0
def modality_models():
    parameter = 20.
    rv_psi1 = stats.beta(parameter, 1)
    rv_psi0 = stats.beta(1, parameter)
    rv_middle = stats.beta(parameter, parameter)
    rv_ambiguous = stats.uniform(0, 1)
    rv_bimodal = stats.beta(1. / parameter, 1. / parameter)

    models = {'Psi~1': rv_psi1,
              'Psi~0': rv_psi0,
              'middle': rv_middle,
              'ambiguous': rv_ambiguous,
              'bimodal': rv_bimodal}
    return models
Example #17
0
def plot_betas():
    xs=linspace(0, 1, 30)
    plt.plot(xs, [beta(4, 2).pdf(x) for x in xs], 'bs', xs, [beta(2, 2).pdf(x) for x in xs], 'g^' )
    font = {'family' : 'serif',
        'color'  : 'darkred',
        'weight' : 'normal',
        'size'   : 18,
        }
    plt.title('The Beta Distribution', fontdict=font)
    plt.text(0.2, 1.5, r'$\alpha=\beta=2$', fontdict=font)
    plt.text(0.45, 2, r'$\alpha=4$,$\beta=2$', fontdict=font)
    plt.xlabel('causal-strength', fontdict=font)
    plt.ylabel('Density', fontdict=font)

    plt.show()
Example #18
0
def hpd_beta(y, n, h=.1, a=1, b=1, plot=False, **plot_kwds):
    apost = y + a
    bpost = n - y + b
    if apost > 1 and bpost > 1:
        mode = (apost - 1)/(apost + bpost - 2)
    else:
        raise Exception("mode at 0 or 1: HPD not implemented yet")

    post = stats.beta(apost, bpost)

    dmode = post.pdf(mode)

    lt = opt.bisect(lambda x: post.pdf(x) / dmode - h, 0, mode)
    ut = opt.bisect(lambda x: post.pdf(x) / dmode - h, mode, 1)

    coverage = post.cdf(ut) - post.cdf(lt)
    if plot:
        plt.figure()
        plotf(post.pdf)
        plt.axhline(h*dmode)
        plt.plot([ut, ut], [0, post.pdf(ut)])
        plt.plot([lt, lt], [0, post.pdf(lt)])
        plt.title(r'$p(%s < \theta < %s | y)$' %
                  tuple(np.around([lt, ut], 2)))

    return lt, ut, coverage, h
Example #19
0
def main():
	dist = stats.beta(10,5)
	steps = 100000
	size = 100
	alpha = 0.05
	changes = int(alpha*size)
	savefile = 'states.csv'
	plotfile = 'dists.png'

#	distrvs = dist.rvs(size)
	current = np.random.random(size)
	cur_ks = ks(current, dist.cdf)[1]

	states = np.zeros((steps,size))

	for i in xrange(steps):
		prop = np.copy(current)
		prop[np.random.choice(range(size),changes)] = np.random.random(changes)
		prop_ks = ks(prop,dist.cdf)[1]
		diff = prop_ks-cur_ks
		if diff>0:
			current = prop
			cur_ks = prop_ks
		states[i] = current
		print cur_ks

	np.savetxt(savefile, states)
Example #20
0
    def __init__(self, alpha, beta):
        self.alpha = alpha
        self.beta = beta

        # set dist before calling super's __init__
        self.dist = st.beta(alpha, beta)
        super(Beta, self).__init__()
Example #21
0
def pickB(c,d,var):
  a=d*d/8./var-0.5
  beta=dists.beta(a,a,loc=c,scale=d)
  yB=[]
  for x in xs:
    yB.append(beta.pdf(x))
  plt.plot(xs,yB,label='c='+str(c))
Example #22
0
    def test_init(self, alphas, betas):
        from flotilla.compute.splicing import ModalityModel

        model = ModalityModel(alphas, betas)

        true_alphas = alphas
        true_betas = betas
        if not isinstance(alphas, Iterable) and not isinstance(betas,
                                                               Iterable):
            true_alphas = [alphas]
            true_betas = [betas]

        true_alphas = np.array(true_alphas) \
            if isinstance(true_alphas, Iterable) else np.ones(
            len(true_betas)) * true_alphas
        true_betas = np.array(true_betas) \
            if isinstance(true_betas, Iterable) else np.ones(
            len(true_alphas)) * true_betas

        true_rvs = [stats.beta(a, b) for a, b in
                    zip(true_alphas, true_betas)]
        true_scores = np.ones(true_alphas.shape).astype(float)
        true_scores = true_scores / true_scores.max()
        true_prob_parameters = true_scores / true_scores.sum()

        npt.assert_array_equal(model.alphas, true_alphas)
        npt.assert_array_equal(model.betas, true_betas)
        npt.assert_array_equal(model.scores, true_scores)
        npt.assert_array_equal(model.prob_parameters, true_prob_parameters)
        for test_rv, true_rv in zip(model.rvs, true_rvs):
            npt.assert_array_equal(test_rv.args, true_rv.args)
Example #23
0
def plot_beta_dist( ctr, trials, success, alphas, betas, turns ):
	"""
	Pass in the ctr, trials and success, alphas, betas returned
	by the `experiment` function and the number of turns 
	and plot the beta distribution for all the arms in that turn
	"""
	subplot_num = len(turns) / 2
	x = np.linspace( 0.001, .999, 200 )
	fig = plt.figure( figsize = ( 14, 7 ) ) 

	for idx, turn in enumerate(turns):

		plt.subplot( subplot_num, 2, idx + 1 )

		for i in range( len(ctr) ):
			y = beta( alphas[i] + success[ turn, i ], 
					  betas[i] + trials[ turn, i ] - success[ turn, i ] ).pdf(x)
			line = plt.plot( x, y, lw = 2, label = "arm {}".format( i + 1 ) )
			color = line[0].get_color()
			plt.fill_between( x, 0, y, alpha = 0.2, color = color )
			plt.axvline( x = ctr[i], color = color, linestyle = "--", lw = 2 )
			plt.title("Posteriors After {} turns".format(turn) )
			plt.legend( loc = "upper right" )

	return fig
Example #24
0
    def setNewEvidence(self, pos, tot):

        a = np.sum(pos)
        b = np.sum(tot) - a
        a_new = self.a + a
        b_new = self.b + b

        # get new PDF
        self.rescale((a_new + b_new) * 1.0)  # some multiplicative factor

        y_new = np.zeros(shape=(len(self.y),), dtype=np.float)
        ## use normal approximation for large a and b, unfortunately we reach large a and b very quickly
        if a_new + b_new > 1000:
            y_new = self.normalApprox(a_new, b_new)
        else:
            self.rv = beta(a_new, b_new)
            y_new = self.rv.pdf(self.x)
        ## just incase something messes up
        if any(np.isnan(y_new)):
            y_new = self.normalApprox(a_new, b_new)

        # measure dKL and dJS before update
        self.measureDKL(y_new)
        self.measureDJS(y_new)

        # update
        self.a = a_new
        self.b = b_new
        self.y = y_new
Example #25
0
    def check_initializer_statistics(self, xp, n):
        from scipy import stats

        ws = xp.empty((n,) + self.shape, dtype=self.dtype)
        for i in range(n):
            initializer = self.target(**self.target_kwargs)
            initializer(xp.squeeze(ws[i:i+1], axis=0))

        expected_scale = self.scale or 1.1
        sampless = cuda.to_cpu(ws.reshape(n, -1).T)
        alpha = 0.01 / len(sampless)

        ab = 0.5 * (self.dim_in - 1)

        for samples in sampless:
            if self.dim_in == 1:
                numpy.testing.assert_allclose(abs(samples), expected_scale)
                _, p = stats.chisquare((numpy.sign(samples) + 1) // 2)
            else:
                _, p = stats.kstest(
                    samples,
                    stats.beta(
                        ab, ab,
                        loc=-expected_scale,
                        scale=2*expected_scale
                    ).cdf
                )
            assert p >= alpha
Example #26
0
 def test_random_vector(self):
     comp = (stats.expon(), stats.beta(0.4, 0.8), stats.norm())
     rv = best.random.RandomVectorIndependent(comp)
     print str(rv)
     x = rv.rvs()
     print 'One sample: ', x
     print 'pdf:', rv.pdf(x)
     x = rv.rvs(size=10)
     print '10 samples: ', x
     print 'pdf: ', rv.pdf(x)
     print rv.mean()
     print rv.var()
     print rv.std()
     print rv.stats()
     # Split it in two:
     rv1, rv2 = rv.split(0)
     print str(rv1)
     x = rv1.rvs(size=5)
     print x
     print rv1.pdf(x)
     print rv2.pdf(x)
     print str(rv2)
     print x
     x = rv2.rvs(size=5)
     print rv2.pdf(x)
     rv3, rv4 = rv1.split(0)
     print str(rv3)
     print str(rv4)
     rv5, rv6 = rv3.split(1)
     print str(rv5)
     print str(rv6)
     rv7, rv8 = rv5.split(2)
     print str(rv7)
     print str(rv8)
Example #27
0
    def _prior_scipy(self):
        """Return the scipy prior. For Binomial inference this the same as the
        marginal because there is a single model parameter."""
        a = self._prior_hyperparameters['alpha']
        b = self._prior_hyperparameters['beta']

        return beta(a, b)
Example #28
0
    def __getBetaDistribution(self, c):
        # left border
        a = c - self._e3 / 2.
        # width of beta distribution
        b = self._e3

        return beta(self._p, self._q, a, b)
Example #29
0
def test_slice_theta_irm():
    N = 10
    defn = model_definition([N], [((0, 0), bbnc)])
    data = np.random.random(size=(N, N)) < 0.8
    view = numpy_dataview(data)
    r = rng()
    prior = {'alpha': 1.0, 'beta': 9.0}

    s = initialize(
        defn,
        [view],
        r=r,
        cluster_hps=[{'alpha': 2.0}],
        relation_hps=[prior],
        domain_assignments=[[0] * N])

    bs = bind(s, 0, [view])

    params = {0: {'p': 0.05}}

    heads = len([1 for y in data.flatten() if y])
    tails = len([1 for y in data.flatten() if not y])

    alpha1 = prior['alpha'] + heads
    beta1 = prior['beta'] + tails

    def sample_fn():
        theta(bs, r, tparams=params)
        return s.get_suffstats(0, [0, 0])['p']

    rv = beta(alpha1, beta1)
    assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
Example #30
0
def beta_cdf(x, mu, sig, a, b):
    s = (mu - a) / (b - a)
    e = (b - a) / sig
    q = s * s * e * e
    alpha = q * (1 - s) - s
    beta_ = q * (s - 2) + s * (1 + e * e) - 1
    return beta(alpha, beta_, loc = a, scale = (b - a)).cdf(x)
Example #31
0
 def expectation(self):
     return scs.beta(self.successes, self.failures).rvs(1)
Example #32
0
"""
beta分布
"""
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

# 这里的值放的是alpha和beta
params = [0.5, 1, 2, 3]
x = np.linspace(0, 1, 100)
f, ax = plt.subplots(len(params), len(params), sharex=True, sharey=True)
for i in range(4):
    for j in range(4):
        a = params[i]
        b = params[j]
        # pdf概率分布相关
        y = stats.beta(a, b).pdf(x)
        ax[i, j].plot(x, y)
        ax[i, j].plot(0,
                      0,
                      label="$\\alpha$={:3.2f}\n$\\beta$={:3.2f}".format(a, b),
                      alpha=0)
        ax[i, j].legend(fontsize=8)
ax[3, 0].set_xlabel('$\\theta$', fontsize=16)
ax[0, 0].set_ylabel('$p(\\theta)$', fontsize=16)
# 保存为图片
# plt.savefig('bata.png', dpi=300, figsize=(5.5, 5.5))
plt.show()
Example #33
0
mean_class_2 = X_class_2.mean()

Prior_prob = [1 / 3, 1 / 3, 1 / 3]
y_pred = classify_using_bayes(X_test)
test_accuracy = np.mean(y_pred == y_test)
print(test_accuracy * 100)

# The accuracy of ML estimate is 100% until model is training and test set size is 50%. When test size increases more than training set, accuracy starts to decrease from 100%

# Probability Distributions

a = 0.1
b = 0.1
X = np.linspace(0 + 1e-5, 1 - 1e-5,
                10000)  #beta dist is only defined for x = [0,1]
rv = beta(a, b)
plt.plot(X, rv.pdf(X))
print("Mean is ", np.mean(rv.pdf(X)))
print("Variance is ", np.var(rv.pdf(X)))

a = 1
b = 1
X = np.linspace(0, 1, 10000)
rv = beta(a, b)
plt.plot(X, rv.pdf(X))
print("Mean is ", np.mean(rv.pdf(X)))
print("Variance is ", np.var(rv.pdf(X)))

a = 2
b = 3
X = np.linspace(0, 1, 10000)
Example #34
0
 def test_logpdf_ticket_1866(self):
     alpha, beta = 267, 1472
     x = np.array([0.2, 0.5, 0.6])
     b = stats.beta(alpha, beta)
     assert_allclose(b.logpdf(x).sum(), -1201.699061824062)
     assert_allclose(b.pdf(x), np.exp(b.logpdf(x)))
def model_selection_cv(pca_trn, pca_tst, y_train, y_test):
    #warnings.filterwarnings("ignore")

    # Load workspace variable from saved file
    #groupby_mean = pd.read_csv('ml-latest/base.csv')

    # -------------------------matrix of the numerical features-------------------------#
    ##import matplotlib.pyplot as plt
    ##cax = plt.matshow(np.cov(D_Arr[:,:-1].T))
    ##cax = plt.matshow(np.cov(D_Arr[:,0:15].T))
    ##plt.clim(-1,1)
    ##plt.colorbar(cax)
    ##plt.title('Covariance matrix of numerical features')
    ##plt.show()

    #rating = groupby_mean.rating
    #groupby_mean.drop(['rating'], axis=1, inplace=True)

    # Split the dataset in the ratio train:test = 0.9:0.1
    #X_train, X_test, y_train, y_test = model_selection.train_test_split(groupby_mean, rating, test_size=0.1,
    #                                                                    random_state=0)

    X_train, X_test = pca_trn, pca_tst

    # # Create OLS linear regression object
    # regrOLS = linear_model.LinearRegression()
    #
    # # Perform 5 fold cross-validation and store the MSE resulted from each fold
    # scores = model_selection.cross_val_score(regrOLS, X_train, y_train, scoring='r2', cv=5)
    #
    # # Note: Due to a known issue in scikit-learn the results return are flipped in sign
    # print('OLS: Least CV error: %.2f\n' % np.min(-scores))
    #
    # # ---------------- Cross validation for Ridge and Lasso ------------------------#
    # # Range of hyper-parameters to choose for CV
    # lambdas = [0.0001, 0.001, 0.01, 0.02, 0.05, 0.1, 1, 10]
    # for l in lambdas:
    #     print('Lambda = %.5f' % l)
    #     # Start time for the 5-fold CV
    #     start = time.time()
    #     # Create ridge regression object
    #     knn_reg = linear_model.Ridge(alpha=l)
    #     scores = model_selection.cross_val_score(knn_reg, X_train, y_train, scoring='r2', cv=5)
    #     end = time.time()
    #     t = end - start
    #     print('Ridge: Least CV error: %.2f and time : %.3f' % (np.min(-scores), t))
    #     start = time.time()
    #     # Create lasso object
    #     regrLasso = linear_model.Lasso(alpha=l)
    #     scores = model_selection.cross_val_score(regrLasso, X_train, y_train, scoring='r2', cv=5)
    #     # Measure and compute time for the 5-fold CV
    #     end = time.time()
    #     t = end - start
    #     print('Lasso: Least CV error: %.2f and time : %.3f' % (np.min(-scores), t))
    #     print('\n')
    #
    # # -------------------- Cross validation for Elastic Net ------------------------#
    # # Range of hyper-parameters to choose for CV
    # l1Ratios = [0.1, 0.25, 0.5, 0.75, 0.9]
    # for l in lambdas:
    #     print('Lambda = %.5f' % l)
    #     for l1R in l1Ratios:
    #         start = time.time()
    #         # Create elastic net object
    #         regrElasNet = linear_model.ElasticNet(alpha=l, l1_ratio=l1R)
    #         scores = model_selection.cross_val_score(regrElasNet, X_train, y_train, scoring='r2',
    #                                                  cv=5)
    #         end = time.time()
    #         t = end - start
    #         print('Elastic Net: l1Ratio = %.2f, Least CV error: %.2f and time : %.3f' % (l1R, np.min(-scores), t))
    #     print('\n')

    # ------------- Cross validation for Random Forest Regressor -------------------#
    # Range of hyper-parameters to choose for CV
    n_estimator = [1, 2, 5, 10, 20, 35, 50, 100, 200]
    maxFeatures = [0.25, 0.5, 0.75, 1]
    maxDepth = [3, 6, 8, 10, 15, 25]
    for n in n_estimator:
        for mf in maxFeatures:
            for d in maxDepth:
                print('Number of trees/estimators = %d, max depth = %d' %
                      (n, d))
                start = time.time()
                # Create Random Forest Regressor object
                randFor = RandomForestRegressor(max_depth=d,
                                                random_state=0,
                                                n_estimators=n,
                                                max_features=mf)
                scores = model_selection.cross_val_score(randFor,
                                                         X_train,
                                                         y_train,
                                                         scoring='r2',
                                                         cv=5)
                end = time.time()
                t = end - start
                print(
                    'Random forest regressor: %% of features = %.2f, Least CV error: %.2f and time : %.3f'
                    % (100 * mf, np.min(-scores), t))
            print('\n')

    # ------------- Cross validation for regressor using AdaBoost  -----------------#
    # Range of hyper-parameters to choose for CV
    n_estimator = [1, 2, 5, 10, 20, 35, 50, 100, 200]
    learning_rate = ['linear', 'square', 'exponential']
    maxDepth = [3, 6, 8, 10, 15, 25]
    for l in learning_rate:
        for n in n_estimator:
            for d in maxDepth:
                print('Number of trees/estimators = %d, max depth = %d' %
                      (n, d))
                start = time.time()
                # Create Boosting Regressor object
                boosting = AdaBoostRegressor(
                    DecisionTreeRegressor(max_depth=d),
                    random_state=0,
                    n_estimators=n,
                    loss=l)
                scores = model_selection.cross_val_score(boosting,
                                                         X_train,
                                                         y_train,
                                                         scoring='r2',
                                                         cv=5,
                                                         n_jobs=1)
                end = time.time()
                t = end - start
                print(
                    'Regressor using AdaBoosting: loss type = %s, Least CV error: %.2f and time : %.3f'
                    % (l, np.min(-scores), t))
            print('\n')

    # ---------------- Cross validation for KNeighborsRegressor------------------------#
    # Range of hyper-parameters to choose for CV
    lambdas = [2, 3, 4, 5, 6, 7, 8, 9]
    for l in lambdas:
        print('Lambda = %.5f' % l)
        # Start time for the 5-fold CV
        start = time.time()
        # Create KNeighborsRegressor object
        knn_reg = KNeighborsRegressor(n_neighbors=l)
        scores = model_selection.cross_val_score(knn_reg,
                                                 X_train,
                                                 y_train,
                                                 scoring='r2',
                                                 cv=5)
        end = time.time()
        t = end - start
        print('n_neighbors: ', l)
        print('KNeighborsRegressor: Least CV error: %.2f and time : %.3f' %
              (np.min(-scores), t))
        print('\n')

    # ------------- Cross validation for regressor using XGBoost  -----------------#
    # Range of hyper-parameters to choose for CV
    # n_estimator = [100, 1000, 10000]
    # learning_rate = [0.02, 0.05, 0.07, 0.1, 0.2, 0.5, 0.7, 1]
    # maxDepth = [3, 6, 8, 10, 15, 25]
    # gamma = [0,0.03,0.1,0.3]
    # colsample_bytree = [0.4,0.6,0.8]
    # reg_alpha = [1e-5, 1e-2,  0.75]
    # reg_lambda = [1e-5, 1e-2, 0.45]
    # subsample = [0.6,0.95]
    # min_child_weight = [1.5,6,10]
    #
    # for l in learning_rate:
    #     for n in n_estimator:
    #         for d in maxDepth:
    #             for g in gamma:
    #                 for c in colsample_bytree:
    #                     for alp in reg_alpha:
    #                         for lam in reg_lambda:
    #                             for s in subsample:
    #                                 for min_child in min_child_weight:
    #                                     print('Number of trees/estimators = %d, max depth = %d' % (n, d))
    #                                     start = time.time()
    #                                     # Create Boosting Regressor object
    #                                     xgb_model = xgboost.XGBRegressor(colsample_bytree=c,
    #                                                          gamma=g,
    #                                                          learning_rate=l,
    #                                                          max_depth=d,
    #                                                          min_child_weight=min_child,
    #                                                          n_estimators=n,
    #                                                          reg_alpha=alp,
    #                                                          reg_lambda=lam,
    #                                                          subsample=s,
    #                                                          seed=42)
    #                                     scores = model_selection.cross_val_score(xgb_model, X_train, y_train, scoring='r2',
    #                                                                              cv=5, n_jobs=1)
    #                                     end = time.time()
    #                                     t = end - start
    #                                     print("Regressor using XGboosting: learning rate = %s, Least CV error: %.2f, "
    #                                           "gamma =.3f, min_child_weight =.4f, n_estimators = .5f, reg_alpha = "
    #                                           ".6f, reg_lambda = .7f, subsample= .8f, max_depth = %.9f  and time : %.10f"
    #                                           % (l, np.min(-scores), g, min_child, n, alp, lam, s, d, t))
    one_to_left = st.beta(10, 1)
    from_zero_positive = st.expon(0, 50)

    params = {
        "n_estimators": st.randint(100, 1000, 10000),
        "max_depth": st.randint(3, 40),
        "learning_rate": st.uniform(0.05, 0.4),
        "colsample_bytree": one_to_left,
        "subsample": one_to_left,
        "gamma": st.uniform(0, 10),
        'reg_alpha': from_zero_positive,
        "min_child_weight": from_zero_positive,
    }

    xgbreg = XGBRegressor(nthreads=-1)

    gs = RandomizedSearchCV(xgbreg, params, n_jobs=1)
    gs.fit(X_train, y_train)

    print("Regressor using XGboosting: ", "\nBest Index: ", gs.best_index_,
          "\nBest estimator: ", gs.best_estimator_, "\nBest Params: ",
          gs.best_params_)
    print('\n')
Example #36
0
    pce_values = pce(validation_samples)
    error = np.linalg.norm(pce_values - validation_values, axis=0)
    if not relative:
        error /= np.sqrt(validation_samples.shape[1])
    else:
        error /= np.linalg.norm(validation_values, axis=0)

    return error


np.random.seed(1)

#%%
# Our goal is to demonstrate how to use a polynomial chaos expansion (PCE) to approximate a function :math:`f(z): \reals^d \rightarrow \reals` parameterized by the random variables :math:`z=(z_1,\ldots,z_d)`. with the joint probability density function :math:`\pdf(\V{\rv})`. In the following we will use a function commonly used in the literature, the oscillatory Genz function. This function is well suited for testing as the number of variables and the non-linearity can be adjusted. We define the random variables and the function with the following code

univariate_variables = [uniform(), beta(3, 3)]
variable = pya.IndependentMultivariateRandomVariable(univariate_variables)

c = np.array([10, 0.01])
model = GenzFunction("oscillatory",
                     variable.num_vars(),
                     c=c,
                     w=np.zeros_like(c))

#%%
# Here we have intentionally set the coefficients :math:`c`: of the Genz function to be highly anisotropic, to emphasize the properties of the adaptive algorithm.
#
# PCE represent the model output :math:`f(\V{\rv})` as an expansion in orthonormal polynomials,
#
# .. math::
#
Example #37
0
    def __init__(self, low: None, peak: None, high: None, gamma=4.0):

        self.a = low
        self.b = peak
        self.c = high
        self.g = gamma
        self.range = (self.c - self.a)
        if self.a is None or self.b is None or self.c is None:
            raise ValueError('Parameters low, peak and high must be specified')
        if self.g <= 0:
            raise ValueError(
                'g parameter should be greater than 0. By default is 4.0')

        self.mean = round((self.a + (self.g * self.b) + self.c) / (self.g + 2),
                          4)

        if self.mean == self.b:
            self.alpha = self.beta = 3.0
        else:
            self.alpha = round(
                ((self.mean - self.a) * (2 * self.b - self.a - self.c)) /
                ((self.b - self.mean) * (self.c - self.a)), 4)
            self.beta = round(
                self.alpha * (self.c - self.mean) / (self.mean - self.a), 4)

        self.dist = ss.beta(self.alpha,
                            self.beta,
                            loc=self.a,
                            scale=self.range)
        self.parameters = np.array([self.alpha, self.beta])
        self.median = round(
            (self.a + ((2 + self.g) * self.b) + self.c) / (4 + self.g), 4)
        self.mode = round(self.b, 4)
        self.variance = round(
            ((self.mean - self.a) * (self.c - self.mean)) / (self.g + 4), 4)
        self.skewness = round(
            (2 *
             (self.beta - self.alpha) * np.sqrt(self.alpha + self.beta + 1)) /
            ((self.alpha + self.beta + 2) * np.sqrt(self.alpha * self.beta)),
            4)

        self.kurt = ((self.g + 2) * (
            (((self.alpha - self.beta)**2) * (self.alpha + self.beta + 1)) +
            (self.alpha * self.beta *
             (self.alpha + self.beta + 2)))) / (self.alpha * self.beta *
                                                (self.alpha + self.beta + 2) *
                                                (self.alpha + self.beta + 4))

        self.excess_kurtosis = round(
            6 * ((self.alpha - self.beta)**2 * (self.alpha + self.beta + 1) -
                 (self.alpha * self.beta * (self.alpha + self.beta + 2))) /
            (self.alpha * self.beta * (self.alpha + self.beta + 2) *
             (self.alpha + self.beta + 4)) + 4, 4)

        self.param_title = str('low=' + str(self.a) + ', peak=' + str(self.b) +
                               ', high=' + str(self.c) + ', Gamma=' +
                               str(self.g))
        self.param_title_long = str('Beta Pert (low=' + str(self.a) +
                                    ', peak=' + str(self.b) + ', high=' +
                                    str(self.c) + ', Gamma=' + str(self.g) +
                                    ')')
                  sampler_lr=1e-2,
                  prior_scale=10,
                  adversary_weight=0.0,
                  num_sample_mc_steps=1000,
                  sampler_beta_min=0.02,
                  sampler_beta_target=10,
                  max_replay=1)
max_resources = 30
if do_search == "halving":
    distributions = dict(
        lr=expon(1e-2),
        sampler_lr=expon(1e-1),
        sampler=["mala", "langevin", "tempered mala", "tempered langevin"],
        weight_decay=expon(1e-3),
        #     max_iter=poisson(30),
        replay_prob=beta(a=9, b=1),
        adversary_weight=beta(a=1, b=1),
        num_units=poisson(32),
        num_layers=poisson(3),
        max_replay=poisson(10),
    )
    clf_cv = HalvingRandomSearchCV(clf,
                                   distributions,
                                   random_state=0,
                                   n_jobs=5,
                                   resource="max_iter",
                                   max_resources=max_resources)
    search = clf_cv.fit(X.values)
    clf = clf_cv.best_estimator_
elif do_search == "bohb":
    distributions = CS.ConfigurationSpace(seed=42)
Example #39
0
def run():
    directory = "results/{}".format(
        datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
    if not os.path.exists(directory + '/trajectories/'):
        os.makedirs(directory + '/trajectories/')

    env = World(2)
    # env = MoveWorld()
    # env = MoveWorldContinuous()
    state = env.reset()
    model = EvolutionStrategies(inputs=env.state_dim, outputs=env.action_dim)

    experience = []
    log = []
    rewards = deque(maxlen=100)
    events = deque(maxlen=100)

    rewards_sat = deque(maxlen=100)
    events_sat = deque(maxlen=100)

    rewards_not_sat = deque(maxlen=100)
    events_not_sat = deque(maxlen=100)

    sats = []
    c_sats = deque(maxlen=100)
    p_sats = deque(maxlen=100)
    n_sat = 0
    c_sat_verification = 0

    for episode in range(params.episodes):
        reward, n_event, _ = run_episode(model, env)

        # update c_sat
        if params.constraint:
            sat = int(n_event <= params.constraint)
            if sat:
                rewards_sat.append(reward)
                events_sat.append(n_event)
            else:
                rewards_not_sat.append(reward)
                events_not_sat.append(n_event)
        else:
            sat = 1

        sats.append(sat)
        n_sat = sum(sats)  # += sat

        successes = n_sat + 1  # incl. prior
        failures = len(sats) - n_sat + 1  # incl. prior
        c_sat = 1. - beta(successes, failures).cdf(params.p_req)
        p_sat = beta(successes, failures).ppf(1 - params.c_req)

        # direct
        if params.calibration == 'direct':
            model.c_sat = c_sat
        elif params.calibration == 'hard':
            model.c_sat = 0 if c_sat < params.c_req else 1
        elif params.calibration == 'soft':
            model.c_sat = max(0, c_sat - params.c_req) / (1 - params.c_req)
        elif params.calibration == 'naive':
            model.c_sat = max(
                0,
                np.mean(sats) - params.p_req) / (1 - params.p_req)

        # TODO: move to verify.py
        if params.verify and constraint is not None:
            if episode % 1000 == 0:
                # TODO: get true model: as method in evolution.py
                v_model = EvolutionStrategies(inputs=env.state_dim,
                                              outputs=env.action_dim)
                for i, param in enumerate(v_model.parameters()):
                    param.data = model.master_weights[i]
                _, _, c_sat_verification, _, _ = verify(v_model, env)
                print(c_sat_verification)

        if params.constraint:
            model.log_reward(reward, -1 * max(n_event - params.constraint, 0))
        else:
            model.log_reward(reward, 0)

        # log results
        rewards.append(reward)
        events.append(n_event)
        c_sats.append(c_sat)
        p_sats.append(p_sat)

        if episode % model.population_size == 0:
            log_entry = {
                'episode': episode,
                'reward': '{0:.2f}'.format(np.mean(rewards)),
                'r sat': '{0:.2f}'.format(np.mean(rewards_sat)),
                'r not sat': '{0:.2f}'.format(np.mean(rewards_not_sat)),
                'events': '{0:.4f}'.format(np.mean(events)),
                'e sat': '{0:.4f}'.format(np.mean(events_sat)),
                'e not sat': '{0:.4f}'.format(np.mean(events_not_sat)),
                'n_sat': '{0:.4f}'.format(np.mean(sats)),
                'c_sat': '{0:.4f}'.format(np.mean(c_sats)),
                'p_sat': '{0:.4f}'.format(np.mean(p_sats)),
                'c_sat_verification':
                '{0:.4f}'.format(np.mean(c_sat_verification)),
                'constraint': params.constraint,
                'calibration': params.calibration,
                'lr': params.learning_rate
            }
            log.append(log_entry)
            df = pd.DataFrame(log)
            df.to_csv(directory + '/log.csv')
            print(log_entry)

            if params.render:
                ImgRenderer(
                    directory + '/trajectories/' + str('%.4f' % reward) + '_' +
                    str('%.4f' % n_event) + '_' + str(episode),
                    env).render_img()
Example #40
0
def plot_loo_pit(
    idata=None,
    y=None,
    y_hat=None,
    log_weights=None,
    ecdf=False,
    ecdf_fill=True,
    n_unif=100,
    use_hdi=False,
    hdi_prob=None,
    figsize=None,
    textsize=None,
    labeller=None,
    color="C0",
    legend=True,
    ax=None,
    plot_kwargs=None,
    plot_unif_kwargs=None,
    hdi_kwargs=None,
    fill_kwargs=None,
    backend=None,
    backend_kwargs=None,
    show=None,
):
    """Plot Leave-One-Out (LOO) probability integral transformation (PIT) predictive checks.

    Parameters
    ----------
    idata : InferenceData
        InferenceData object.
    y : array, DataArray or str
        Observed data. If str, idata must be present and contain the observed data group
    y_hat : array, DataArray or str
        Posterior predictive samples for ``y``. It must have the same shape as y plus an
        extra dimension at the end of size n_samples (chains and draws stacked). If str or
        None, idata must contain the posterior predictive group. If None, y_hat is taken
        equal to y, thus, y must be str too.
    log_weights : array or DataArray
        Smoothed log_weights. It must have the same shape as ``y_hat``
    ecdf : bool, optional
        Plot the difference between the LOO-PIT Empirical Cumulative Distribution Function
        (ECDF) and the uniform CDF instead of LOO-PIT kde.
        In this case, instead of overlaying uniform distributions, the beta ``hdi_prob``
        around the theoretical uniform CDF is shown. This approximation only holds
        for large S and ECDF values not vary close to 0 nor 1. For more information, see
        `Vehtari et al. (2019)`, `Appendix G <https://avehtari.github.io/rhat_ess/rhat_ess.html>`_.
    ecdf_fill : bool, optional
        Use fill_between to mark the area inside the credible interval. Otherwise, plot the
        border lines.
    n_unif : int, optional
        Number of datasets to simulate and overlay from the uniform distribution.
    use_hdi : bool, optional
        Compute expected hdi values instead of overlaying the sampled uniform distributions.
    hdi_prob : float, optional
        Probability for the highest density interval. Works with ``use_hdi=True`` or ``ecdf=True``.
    figsize : figure size tuple, optional
        If None, size is (8 + numvars, 8 + numvars)
    textsize: int, optional
        Text size for labels. If None it will be autoscaled based on figsize.
    labeller : labeller instance, optional
        Class providing the method `make_pp_label` to generate the labels in the plot titles.
        Read the :ref:`label_guide` for more details and usage examples.
    color : str or array_like, optional
        Color of the LOO-PIT estimated pdf plot. If ``plot_unif_kwargs`` has no "color" key,
        an slightly lighter color than this argument will be used for the uniform kde lines.
        This will ensure that LOO-PIT kde and uniform kde have different default colors.
    legend : bool, optional
        Show the legend of the figure.
    ax: axes, optional
        Matplotlib axes or bokeh figures.
    plot_kwargs : dict, optional
        Additional keywords passed to ax.plot for LOO-PIT line (kde or ECDF)
    plot_unif_kwargs : dict, optional
        Additional keywords passed to ax.plot for overlaid uniform distributions or
        for beta credible interval lines if ``ecdf=True``
    hdi_kwargs : dict, optional
        Additional keywords passed to ax.axhspan
    fill_kwargs : dict, optional
        Additional kwargs passed to ax.fill_between
    backend: str, optional
        Select plotting backend {"matplotlib","bokeh"}. Default "matplotlib".
    backend_kwargs: bool, optional
        These are kwargs specific to the backend being used. For additional documentation
        check the plotting method of the backend.
    show : bool, optional
        Call backend show function.

    Returns
    -------
    axes : matplotlib axes or bokeh figures

    References
    ----------
    * Gabry et al. (2017) see https://arxiv.org/abs/1709.01449
    * https://mc-stan.org/bayesplot/reference/PPC-loo.html
    * Gelman et al. BDA (2014) Section 6.3

    Examples
    --------
    Plot LOO-PIT predictive checks overlaying the KDE of the LOO-PIT values to several
    realizations of uniform variable sampling with the same number of observations.

    .. plot::
        :context: close-figs

        >>> import arviz as az
        >>> idata = az.load_arviz_data("radon")
        >>> az.plot_loo_pit(idata=idata, y="y")

    Fill the area containing the 94% highest density interval of the difference between uniform
    variables empirical CDF and the real uniform CDF. A LOO-PIT ECDF clearly outside of these
    theoretical boundaries indicates that the observations and the posterior predictive
    samples do not follow the same distribution.

    .. plot::
        :context: close-figs

        >>> az.plot_loo_pit(idata=idata, y="y", ecdf=True)

    """
    if ecdf and use_hdi:
        raise ValueError("use_hdi is incompatible with ecdf plot")

    if labeller is None:
        labeller = BaseLabeller()

    loo_pit = _loo_pit(idata=idata, y=y, y_hat=y_hat, log_weights=log_weights)
    loo_pit = loo_pit.flatten() if isinstance(
        loo_pit, np.ndarray) else loo_pit.values.flatten()

    loo_pit_ecdf = None
    unif_ecdf = None
    p975 = None
    p025 = None
    loo_pit_kde = None
    hdi_odds = None
    unif = None
    x_vals = None

    if hdi_prob is None:
        hdi_prob = rcParams["stats.hdi_prob"]
    else:
        if not 1 >= hdi_prob > 0:
            raise ValueError(
                "The value of hdi_prob should be in the interval (0, 1]")

    if ecdf:
        loo_pit.sort()
        n_data_points = loo_pit.size
        loo_pit_ecdf = np.arange(n_data_points) / n_data_points
        # ideal unnormalized ECDF of uniform distribution with n_data_points points
        # it is used indistinctively as x or p(u<x) because for u~U(0,1) they are equal
        unif_ecdf = np.arange(n_data_points + 1)
        p975 = stats.beta.ppf(0.5 + hdi_prob / 2, unif_ecdf + 1,
                              n_data_points - unif_ecdf + 1)
        p025 = stats.beta.ppf(0.5 - hdi_prob / 2, unif_ecdf + 1,
                              n_data_points - unif_ecdf + 1)
        unif_ecdf = unif_ecdf / n_data_points
    else:
        x_vals, loo_pit_kde = kde(loo_pit)

        unif = np.random.uniform(size=(n_unif, loo_pit.size))
        if use_hdi:
            n_obs = loo_pit.size
            hdi_ = stats.beta(n_obs / 2, n_obs / 2).ppf((1 - hdi_prob) / 2)
            hdi_odds = (hdi_ / (1 - hdi_), (1 - hdi_) / hdi_)

    loo_pit_kwargs = dict(
        ax=ax,
        figsize=figsize,
        ecdf=ecdf,
        loo_pit=loo_pit,
        loo_pit_ecdf=loo_pit_ecdf,
        unif_ecdf=unif_ecdf,
        p975=p975,
        p025=p025,
        fill_kwargs=fill_kwargs,
        ecdf_fill=ecdf_fill,
        use_hdi=use_hdi,
        x_vals=x_vals,
        hdi_kwargs=hdi_kwargs,
        hdi_odds=hdi_odds,
        n_unif=n_unif,
        unif=unif,
        plot_unif_kwargs=plot_unif_kwargs,
        loo_pit_kde=loo_pit_kde,
        textsize=textsize,
        labeller=labeller,
        color=color,
        legend=legend,
        y_hat=y_hat,
        y=y,
        hdi_prob=hdi_prob,
        plot_kwargs=plot_kwargs,
        backend_kwargs=backend_kwargs,
        show=show,
    )

    if backend is None:
        backend = rcParams["plot.backend"]
    backend = backend.lower()

    # TODO: Add backend kwargs
    plot = get_plotting_function("plot_loo_pit", "loopitplot", backend)
    axes = plot(**loo_pit_kwargs)

    return axes
Example #41
0
def get_prior(prior, verbose=False):

    prior_lst = []
    initv = []
    lb = []
    ub = []

    if verbose:
        print('Adding parameters to the prior distribution...')

    for pp in prior:

        dist = prior[str(pp)]

        if len(dist) == 3:
            initv.append(None)
            lb.append(None)
            ub.append(None)
            ptype = dist[0]
            pmean = dist[1]
            pstdd = dist[2]
        elif len(dist) == 6:
            if dist[0] == 'None':
                initv.append(None)
            else:
                initv.append(dist[0])
            lb.append(dist[1])
            ub.append(dist[2])
            ptype = dist[3]
            pmean = dist[4]
            pstdd = dist[5]
        else:
            raise NotImplementedError(
                'Shape of prior specification of %s is unclear (!=3 & !=6).' %
                pp)

        # simply make use of frozen distributions
        if str(ptype) == 'uniform':
            prior_lst.append(ss.uniform(loc=pmean, scale=pstdd - pmean))

        elif str(ptype) == 'normal':
            prior_lst.append(ss.norm(loc=pmean, scale=pstdd))

        elif str(ptype) == 'gamma':
            b = pstdd**2 / pmean
            a = pmean / b
            prior_lst.append(ss.gamma(a, scale=b))

        elif str(ptype) == 'beta':
            a = (1 - pmean) * pmean**2 / pstdd**2 - pmean
            b = a * (1 / pmean - 1)
            prior_lst.append(ss.beta(a=a, b=b))

        elif str(ptype) == 'inv_gamma':

            def targf(x):
                y0 = ss.invgamma(x[0], scale=x[1]).std() - pstdd
                y1 = ss.invgamma(x[0], scale=x[1]).mean() - pmean
                return np.array([y0, y1])

            ig_res = so.root(targf, np.array([4, 4]), method='lm')

            if ig_res['success'] and np.allclose(targf(ig_res['x']), 0):
                prior_lst.append(
                    ss.invgamma(ig_res['x'][0], scale=ig_res['x'][1]))
            else:
                raise ValueError(
                    'Can not find inverse gamma distribution with mean %s and std %s'
                    % (pmean, pstdd))

        elif str(ptype) == 'inv_gamma_dynare':
            s, nu = inv_gamma_spec(pmean, pstdd)
            ig = InvGammaDynare()(s, nu)
            # ig = ss.invgamma(nu/2, scale=s/2)
            prior_lst.append(ig)

        else:
            raise NotImplementedError(' Distribution *not* implemented: ',
                                      str(ptype))
        if verbose:
            if len(dist) == 3:
                print('  parameter %s as %s with mean %s and std/df %s...' %
                      (pp, ptype, pmean, pstdd))
            if len(dist) == 6:
                print(
                    '  parameter %s as %s (%s, %s). Init @ %s, with bounds (%s, %s)...'
                    % (pp, ptype, pmean, pstdd, dist[0], dist[1], dist[2]))

    return prior_lst, initv, (lb, ub)
tau_1 = .05  # 'Low' treatment effect
tau_2 = .1  # 'High' treatment effect
MDE_bar = [tau_1, tau_2, tau_2 - tau_1]

# Specify mean of village level adoption effect
p_v = .033

# Specify mean of individual level adoption effect
p_i = .017

# Set up distribution for mean village level adoption rates, which will be used
# later to simulate adoption decisions, but is needed now to estimate the
# variance of village level and individual level effects
alpha_v = p_v * 100  # First parameter of the beta distribution
beta_v = 100 - alpha_v  # Second parameter of the beta distribution
F_v = beta(alpha_v, beta_v)  # Full beta distribution

# Set up distribution for individual level adoption rates
alpha_i = p_i * 100
beta_i = 100 - alpha_i
F_i = beta(alpha_i, beta_i)

# Set number of observations to use for variance calculation
nvar = 100000

# Draw sample of individual level parameters
samp_i = F_i.rvs(size=(nvar, 1))

# Convert to Bernoulli random variables
samp_i = np.random.binomial(1, samp_i, size=(nvar, 1))
seed_selection_list = [2]

direction = 'python/data/example4/burnin_study/'

for seed_selection_strategy in seed_selection_list:
    for burnin in burn_in_list:

        # file-name
        filename = direction + 'mp_liebscher_N' + repr(N) + \
                '_Nsim' + repr(n_simulations) + '_b' + repr(burnin) + '_' + sampling_method + \
                '_sss' + repr(seed_selection_strategy)

        # parameters for beta-distribution
        p = 6.0
        q = 6.0
        beta_distr = scps.beta(p, q, loc=-2, scale=8)

        # transformation to/from U-space
        phi = lambda x: scps.norm.cdf(x)
        phi_inv = lambda x: scps.norm.ppf(x)

        #CDF     = lambda x: scps.beta.cdf(x, p, q)
        CDF = lambda x: beta_distr.cdf(x)
        #CDF_inv = lambda x: scps.beta.ppf(x, p, q)
        CDF_inv = lambda x: beta_distr.ppf(x)

        transform_U2X = lambda u: CDF_inv(phi(u))
        transform_X2U = lambda x: phi_inv(CDF(x))

        # limit-state function
        z = lambda x: 8 * np.exp(-(x[0]**2 + x[1]**2)) + 2 * np.exp(-(
Example #44
0
#print(data)

with pm.Model() as model_h:
    alpha = pm.HalfCauchy('alpha', beta=10)
    beta = pm.HalfCauchy('beta', beta=10)
    theta = pm.Beta('theta', alpha, beta, shape=len(N_samples))
    y = pm.Bernoulli('y', p=theta[group_idx], observed=data)
    trace_h = pm.sample(2000)

chain_h = trace_h[200:]
pm.traceplot(chain_h)
pm.summary(chain_h)
plt.savefig('img314.png')
plt.clf()
print(chain_h)

x = np.linspace(0, 1, 100)
for i in np.random.randint(0, len(chain_h), size=100):
    pdf = stats.beta(chain_h['alpha'][i], chain_h['beta'][i]).pdf(x)
    plt.plot(x, pdf, 'g', alpha=0.5)

dist = stats.beta(chain_h['alpha'].mean(), chain_h['beta'].mean())
pdf = dist.pdf(x)
mode = x[np.argmax(pdf)]
mean = dist.moment(1)
plt.plot(x, pdf, label='mode={:.2f}\nmean={:2f}'.format(mode, mean))

plt.legend(fontsize=14)
plt.xlabel(r'$\theta_{prior}$', fontsize=16)
plt.savefig('img315.Png')
Example #45
0
    def mean(self):
        return self.successes / (self.successes + self.failures)


#%%
probabilities = [.28, .3, .32]
bandits = [Bandit(p) for p in probabilities]

pulls = [0, 0, 0]
wins = [0, 0, 0]

epsilon = 1

for i in range(1000):
    index = np.argmax([bandit.expectation() for bandit in bandits])

    result = bandits[index].pull()
    pulls[index] += 1
    if result == 1:
        wins[index] += 1
    bandits[index].update(result)

print(pulls)
print(wins)

#%%
n = 10000
A = scs.beta(bandits[0].successes, bandits[0].failures).rvs(n)
B = scs.beta(bandits[2].successes, bandits[2].failures).rvs(n)

print((B - .04 > A).mean())
Example #46
0
import matplotlib.pyplot as plt

plt.style.use(["seaborn-paper"])

from pysim.information.entropy import marginal_entropy

seed = 123
np.random.seed(seed)

n_samples = 1_000
a = 5
b = 10

# initialize data distribution
data_dist1 = stats.gamma(a=a)
data_dist2 = stats.beta(a=a, b=b)

# get some samples
X1_samples = data_dist1.rvs(size=n_samples)[:, None]
X2_samples = data_dist2.rvs(size=n_samples)[:, None]
X_samples = np.hstack([X1_samples, X2_samples])

assert X_samples.shape[1] == 2

sns.jointplot(X_samples[:, 0], X_samples[:, 1])
plt.show()

# ===========================
# True Entropy
# ===========================
H1_true = data_dist1.entropy()
Example #47
0
from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt
from scipy import stats as st
import numpy as np

visit_A = 1300
visit_B = 1275

conversion_A = 120
conversion_B = 125

alpha = 1
beta = 1
n_samples = 1000

posterior_A = st.beta(alpha + conversion_A, beta + visit_A - conversion_A)
posterior_B = st.beta(alpha + conversion_B, beta + visit_B - conversion_B)
posterior_samples_A = st.beta(alpha + conversion_A,
                              beta + visit_A - conversion_A).rvs(n_samples)
posterior_samples_B = st.beta(alpha + conversion_B,
                              beta + visit_B - conversion_B).rvs(n_samples)

# posterior mean
print("{}% chance of A site better than B".format(
    (posterior_samples_A > posterior_samples_B).mean()))

figsize(12.5, 4)

#------------------------------------------------------------------
# Posterior Dist of A and B
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
Example #48
0
X_descriptive = pd.read_pickle(
    "./template/descriptive_stats/X_descriptive_relative_GB.pkl")
test.rename(columns={"Wingate": "Wattbike"}, inplace=True)
features = test.iloc[:, 0:18].columns.tolist()

target = test.iloc[:, 18:].columns.tolist()

test.dropna(inplace=True)
test.reset_index(drop=True, inplace=True)

corr_matrix = test.corr()

corr_matrix = corr_matrix.loc[target, features].T

n = 72
dist = ss.beta(n / 2 - 1, n / 2 - 1, loc=-1, scale=2)
p = 2 * dist.cdf(-abs(corr_matrix))

p = pd.DataFrame(p, columns=target, index=features)

labels = corr_matrix.round(2).astype(str)
p_value = p
for i in labels:
    print(i)
    for index, value in labels[i].items():
        print(index, value)
        if p_value.loc[index, i] <= 0.01:
            labels.loc[index, i] = value + '**'
        elif p_value.loc[index, i] <= 0.05:
            labels.loc[index, i] = value + '*'
        else:
Example #49
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import bernoulli, beta

# Beta(a, b) parameters for specified mean and variance
beta_a = lambda mean, var: mean*(mean*(1-mean)/var-1)
beta_b = lambda mean, var: (1-mean)*(mean*(1-mean)/var-1)
Beta = lambda mean_p, var_p: beta(beta_a(mean_p, var_p), beta_b(mean_p, var_p))

# Generate a data set of unemployement sequences given entry and exit distributions
def sample_data(N, T, P, Q):
    data = []
    rates = []
    for sample in range(N):
        p, q = P.rvs(), Q.rvs()
        data.extend([(sample, time, spell, timein, unemployed, event) for (time, spell, timein, unemployed, event) in sample_sequence(T, p, q)])
        rates.append((sample, p, q))
    data = pd.DataFrame(data, columns=['sample', 'time', 'spell', 'timein', 'unemployed', 'event'])
    rates = pd.DataFrame(rates, columns=['sample', 'entry', 'exit'])
    return(data, rates)

# Generate a single sequence of observations given entry and exit rates
def sample_sequence(T, enter, exit):
    history = []
    spell = 0
    timein = 1
    enter = 1e-6 if enter < 1e-6 else enter
    exit = 1e-6 if exit < 1e-6 else exit
    steady_state = enter / (enter + exit)
    unemployed = bernoulli.rvs(steady_state)
Example #50
0
def main(argv):
    # Get and parse the command line arguments
    image_loc, target_name = get_arguments(argv)

    # Read the input image
    img = cv2.imread(image_loc, 0)

    # Check if image exists or not
    if (img is None):
        print ("Cannot open {} image".format(image_loc))
        print ("Make sure you provide the correct image path")
        sys.exit(2)

    # Calculate the input images' histogram
    input_hist = cv2.calcHist([img], [0], None, [256], [0,256])

    # Normalize the input histogram
    total = sum(input_hist)
    input_hist /= total

    # Calculate the cumulative input histogram
    cum_input_hist = []
    cum = 0.0
    for i in range(len(input_hist)):
        cum += input_hist[i][0]
        cum_input_hist.append(cum)

    # Calculate the variance of the image
    input_img_var = np.var(img)

    # Calculate the variance of the image square
    input_img_sqr_var = np.var(img**2)

    # Calculate the target dist for diff dist's
    target_dist = []
    target_hist = None
    if (target_name == "uniform"):
        # Import the package of the target distribution
        from scipy.stats import uniform

        # Create uniform distribution object
        unif_dist = uniform(0, 246)

        # Calculate the target distribution
        for i in range(0, 246):
            x = unif_dist.pdf(i)
            target_dist.append(x)
        for i in range(246, 256):
            target_dist.append(0)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

    elif (target_name == "normal"):
        # Import the package of the target distribution
        from scipy.stats import norm

        # Create standard normal distribution object
        norm_dist = norm(0, 1)

        # Calculate the target distribution
        for i in range(0, 256):
            x = norm_dist.pdf(i/42.0 - 3)
            target_dist.append(x)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "rayleigh"):
        # Import the package of the target distribution
        from scipy.stats import rayleigh

        # Create rayleigh distribution object
        rayleigh_dist = rayleigh(0.5)

        # Calculate the target distribution
        for i in range(0, 256):
            x = rayleigh_dist.pdf(i/128.0)
            target_dist.append(x)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "gamma"):
        # Import the package of the target distribution
        from scipy.stats import gamma

        # Create gamma distribution object
        gamma_dist = gamma(0.5, 0, 1.0)

        # Calculate the target distribution
        target_dist.append(1)
        for i in range(1, 256):
            x = gamma_dist.pdf(i/256.0)
            target_dist.append(x)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "weibull"):
        # Import the package of the target distribution
        from scipy.stats import weibull_min

        # Create weibull distribution object
        weibull_dist = weibull_min(c=1.4, scale=input_img_var)

        # Calculate the target distribution
        for i in range(0, 256):
            x = weibull_dist.pdf(i/256.0)
            target_dist.append(x)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "beta1"):
        # Import the package of the target distribution
        from scipy.stats import beta

        # Create beta distribution object
        beta_dist = beta(0.5, 0.5)

        # Calculate the target distribution
        target_dist.append(6)
        for i in range(1, 255):
            x = beta_dist.pdf(i/256.0)
            target_dist.append(x)
        target_dist.append(6)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "beta2"):
        # Import the package of the target distribution
        from scipy.stats import beta

        # Create beta distribution object
        beta_dist = beta(5, 1)

        # Calculate the target distribution
        for i in range(0, 255):
            x = beta_dist.pdf(i/256.0)
            target_dist.append(x)
        target_dist.append(6)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "lognorm"):
        # Import the package of the target distribution
        from scipy.stats import lognorm

        # Create lognorm distribution object
        lognorm_dist = lognorm(1)

        # Calculate the target distribution
        for i in range(0, 256):
            x = lognorm_dist.pdf(i/100.0)
            target_dist.append(x)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "laplace"):
        # Import the package of the target distribution
        from scipy.stats import laplace

        # Create lognorm distribution object
        laplace_dist = laplace(4)

        # Calculate the target distribution
        target_dist.append(0)
        for i in range(1, 256):
            x = laplace_dist.pdf(i/256.0)
            target_dist.append(x)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    elif (target_name == "beta3"):
        # Import the package of the target distribution
        from scipy.stats import beta

        # Create beta distribution object
        beta_dist = beta(8, 2)

        # Calculate the target distribution
        for i in range(0, 255):
            x = beta_dist.pdf(i/256.0)
            target_dist.append(x)
        target_dist.append(0)

        # Calculate the target histogram
        target_hist = np.ndarray(shape=(256,1))
        for i in range(0,256):
            target_hist[i][0] = target_dist[i]

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    else: # Image itself is a target distribution case
        # Read the image
        target_dist = cv2.imread(target_name, 0)

        # Check if image is read or not
        if (target_dist is None):
            print ("{} is not a valid target name (or) image does not exist".format(target_name))
            print ("Make sure you give correct target name or correct target image location")
            sys.exit(2)

        # Create target histogram from the image
        target_hist = cv2.calcHist([target_dist], [0], None, [256], [0,256])

        # Normalize the target histogram
        total = sum(target_hist)
        target_hist /= total

    # Calculate the cumulative target histogram
    cum_target_hist = []
    cum = 0.0
    for i in range(len(target_hist)):
        cum += target_hist[i][0]
        cum_target_hist.append(cum)

    # Obtain the mapping from the input hist to target hist
    lookup = {}
    for i in range(len(cum_input_hist)):
        min_val = abs(cum_target_hist[0] - cum_input_hist[i])
        min_j = 0

        for j in range(1, len(cum_target_hist)):
            val = abs(cum_target_hist[j] - cum_input_hist[i])
            if (val < min_val):
                min_val = val
                min_j = j

        lookup[i] = min_j

    # Create the transformed image using the img's pixel values and the lookup table
    trans_img = img.copy()
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            trans_img[i][j] = lookup[img[i][j]]

    # Write the transformed image to a png file
    cv2.imwrite('images/transformed.png', trans_img)

    # Plot the input image and the target image in one plot
    input_img_resized = cv2.resize(img, (0,0), None, 0.25, 0.25)
    trans_img_resized = cv2.resize(trans_img, (0,0), None, 0.25, 0.25)
    numpy_horiz = np.hstack((input_img_resized, trans_img_resized))
    cv2.imshow('Input image ------------------------ Trans image', numpy_horiz)
    cv2.waitKey(25)

    # Calculate the transformed image's histogram
    trans_hist = cv2.calcHist([trans_img], [0], None, [256], [0,256])

    # Normalize the transformed image's histogram
    total = sum(trans_hist)
    trans_hist /= total

    # Convert cum_input_hist to matrix for plotting
    cum_input_hist_matrix = np.ndarray(shape=(256,1))
    for i in range(0,256):
        cum_input_hist_matrix[i][0] = cum_input_hist[i]

    # Calculate the cum transformed histogram for plotting
    cum_trans_hist = np.ndarray(shape=(256,1))
    cum = 0.0
    for i in range(0,256):
        cum += trans_hist[i][0]
        cum_trans_hist[i][0] = cum

    # Convert cum_target_hist to matrix for plotting
    cum_target_hist_matrix = np.ndarray(shape=(256,1))
    for i in range(0,256):
        cum_target_hist_matrix[i][0] = cum_target_hist[i]

    plt.subplot(2, 3, 1)
    plt.title('Original hist')
    plt.plot(input_hist)

    plt.subplot(2, 3, 2)
    plt.title('Original cdf')
    plt.plot(cum_input_hist_matrix)

    plt.subplot(2, 3, 3)
    plt.title('Target pdf')
    plt.plot(target_hist)

    plt.subplot(2, 3, 4)
    plt.title('Transformed hist')
    plt.plot(trans_hist)

    plt.subplot(2, 3, 5)
    plt.title('Transformed cdf')
    plt.plot(cum_trans_hist)

    plt.subplot(2, 3, 6)
    plt.title('Target cdf')
    plt.plot(cum_target_hist_matrix)
    plt.show()
Example #51
0
#--------------------------------------------------------------------------------------------------------------
from scipy.stats import beta

#fit beta to previous CTRs
prior_parameters = beta.fit(click_through_rates
                            , floc = 0
                            , fscale = 1)
prior_parameters 

#extract a,b from fit
prior_a, prior_b = prior_parameters[0:2]
prior_a
prior_b

#define prior distribution sample from prior
prior_distribution = beta(prior_a, prior_b)
prior_distribution

#get histogram of samples
prior_samples = prior_distribution.rvs(10000) #rvs : produces a single value of a pseudorandom variable
prior_samples

#get histogram of samples
fit_counts, bins = np.histogram(prior_samples, zero_to_one)
fit_counts

#normalize histogram
fit_counts = map(lambda x: float(x)/fit_counts.sum(), fit_counts)
fit_counts

#plot
Example #52
0
plt.colorbar()

# In[782]:

x = np.arange(0, .2, 0.0001)
cmap = list(plt.cm.tab10(list(range(len(machines)))))

plt.figure(figsize=(26, 14))

# plot 1
n_rounds = 0
en = Environment(machines, payouts, n_rounds)
tsa = ThompsonSampler(env=en)
plt.subplot(231)
for i in range(len(machines)):
    pdf = beta(tsa.a[i], tsa.b[i]).pdf(x)
    c = cmap[i]
    plt.plot(x, pdf, c=c, label=i, alpha=.6)
plt.title(f"Prior distribution for each variant (uniform between 0 and 1)")
plt.legend()

# plot 2
n_rounds = 500
en = Environment(machines, payouts, n_rounds)
tsa = ThompsonSampler(env=en)
en.run(agent=tsa)
plt.subplot(232)
for i in range(len(machines)):
    pdf = beta(tsa.a[i], tsa.b[i]).pdf(x)
    c = cmap[i]
    plt.plot(x, pdf, c=c, label=i, alpha=.6)
# SCIPY
# Anonymous functions
square = lambda x: x**2
square(2)

from scipy.integrate import quad

quad(lambda x: x**3, 0, 1)

# A histogram an the shape of the distribution
import numpy as np
from scipy.stats import beta
import matplotlib.pyplot as plt

q = beta(5, 5)  # Beta(a, b), with a = b = 5
obs = q.rvs(2000)  # 2000 observations
grid = np.linspace(0.01, 0.99, 100)
fig, ax = plt.subplots()
ax.hist(obs, bins=40, normed=True)
ax.plot(grid, q.pdf(grid), 'k-', linewidth=2)
fig.show()

##########################
## EXERCISE: Employment simulation
##########################
'''
Using US unemployment data, Hamilton [Ham05] estimated the stochastic matrix
P =
0.971 0.029 0
0.145 0.778 0.077
Example #54
0
import best_model
import pandas as pd

if __name__ == '__main__':

    x_train, y_train, x_test = feature_engineering_titanic.read_titanic()

    x_train = x_train.as_matrix()
    y_train = y_train.as_matrix()
    x_test = x_test.as_matrix()

    # split train validate
    # x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.3, random_state=0)

    # get best model
    one_to_left = st.beta(10, 1)
    from_zero_positive = st.expon(0, 50)

    params = {
        "n_estimators": st.randint(3, 40),
        "max_depth": st.randint(3, 10),
        "learning_rate": st.uniform(0.05, 0.4),
        "colsample_bytree": one_to_left,
        "subsample": one_to_left,
        "gamma": st.uniform(0, 10),
        'reg_alpha': from_zero_positive,
        "min_child_weight": from_zero_positive,
    }
    xgb_clf = XGBClassifier(nthreads=-1)

    best_xgb_model = best_model.get_best_model(x_train,
Example #55
0
24 * 60
sleep.max()
# So somebody slept 23.5 / 24 hours
# It's possible to fit this using a beta distribution
# Beta is only defined on [0, 1]
sleep.min()
# So we'll need to scale it
# All RV's in Scipy have parameters for shape, location and scale
stats.beta.fit?
stats.beta.fit(sleep, floc=0, fscale = 24 * 60)
# floc means 'fixed location'
bparams = stats.beta.fit(sleep, floc=0, fscale = 24 * 60)
# We know the shape and scale, so we'll fit using this knowledge
# This is the MLE for alpha and beta parameters
# Now we make a random variable
sbeta = stats.beta(*bparams)
sbeta
sbeta.interval(1)
sbeta.mean()
sleep.mean()

# So sbeta here is the beta distribution fitted to this data
# Let's plot it and make sure
# that it looks reasonable

x = np.linspace(0, 60*24)
h, edges = np.histogram(sleep, 30, normed=True)
plt.bar(edges[:-1], h, width=np.diff(edges))
plt.plot(x, sbeta.pdf(x), linewidth=4, color='orange')

plt.clf()
Example #56
0
File: styles.py Project: znob/arviz
"""
Styles
======

_thumb: .8, .8
"""
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import arviz as az

x = np.linspace(0, 1, 100)
dist = stats.beta(2, 5).pdf(x)

style_list = [
    'default', ['default', 'arviz-colors'], 'arviz-darkgrid',
    'arviz-whitegrid', 'arviz-white'
]

fig = plt.figure(figsize=(12, 12))
for idx, style in enumerate(style_list):
    with az.style.context(style):
        ax = fig.add_subplot(3, 2, idx + 1, label=idx)
        for i in range(10):
            ax.plot(x, dist - i, f'C{i}', label=f'C{i}')
        ax.set_title(style)
        ax.set_xlabel('x')
        ax.set_ylabel('f(x)', rotation=0, labelpad=15)
        ax.legend(bbox_to_anchor=(1, 1))
plt.tight_layout()
Example #57
0
plt.axvline(mle, linestyle ="--")
line1, = plt.plot(possible_thetas, likelihoods)

bins = [x/100 for x in range(100)]
counts, bins = np.histogram(infections_rates, bins=bins)
counts = counts / counts.sum()
line2, = plt.plot(bins[:-1], counts)
plt.xlabel("Theta")
plt.title("Evidence vs Historical Infection Rates")
plt.legend((line1, line2), ('Likelihood of Theta with new evidence', 'Frequency of Theta in last 100 months')
           , loc = 'upper left')
plt.show()

# Model the data with a beta function
prior_a, prior_b = beta.fit(infections_rates, floc = 0, fscale = 1)[0:2] # Fit data to find a & b for the beta dist.
prior = beta(prior_a, prior_b)

prior_samples = prior.rvs(10000)  # Sample from the prior
beta_sample_counts, bins = np.histogram(prior_samples, bins)
total = beta_sample_counts.sum()
beta_sample_counts = [x / total for x in beta_sample_counts]

plt.figure(figsize=(10, 7))

line1, = plt.plot(bins[:-1], beta_sample_counts)

hist_rates, bins = np.histogram(infections_rates, bins)
total = hist_rates.sum()
hist_rates = [x/total for x in hist_rates]
line2, = plt.plot(bins[:-1], hist_rates)
Example #58
0
#plot Bayesian updates to beta function with prior 1.4, 2.3
from scipy.stats import beta
import numpy as np
import matplotlib.pyplot as plt 
x = np.linspace(0, 1, num = 100)
plt.plot(x, beta(1.4, 2.3).pdf(x), label = "Prior = beta(1.4,2.3)")
for i in range(1,11,2):
    plt.plot(x, beta(1.4+i,2.3).pdf(x), label = "After {} heads".format(i))
for i in range(1,6,2):
    plt.plot(x, beta(1.4+i*5+10,2.3).pdf(x), label = "After {} heads".format(i*5+10))   
plt.legend()
plt.title("Updates to prior distribution")
plt.show()
Example #59
0
from scipy import stats
from scipy import optimize as opt
from scipy.stats import beta, uniform  # ベータ分布と一様分布
import matplotlib.pyplot as plt
# %matplotlib inline

plt.style.use("ggplot")
np.random.seed(123)

# 目標分布
a, b = 1.5, 2.0
x = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b),
                100)  # ベータ分布x=0.001-0.999まで100個準備
plt.plot(x, beta.pdf(x, a, b))

# 上記ベータ分布の最大値のxを求める
f = beta(a=a, b=b).pdf
res = opt.fmin(lambda x: -f(x), 0.3)  # 最大値求めるのを最小値求めるのに変えるために-f(x)にしている
y_max = f(res)

y_max

NMCS = 5000
x_mcs = uniform.rvs(size=NMCS)  # uniform.rvs:一様分布に従うサンプリング
r = uniform.rvs(size=NMCS) * y_max
accept = x_mcs[r <= f(x_mcs)]
plt.hist(accept, bins=30, rwidth=0.8, label="rejection sampling")
x = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
plt.plot(x, beta.pdf(x, a, b), label="Target dis")
plt.legend()
Example #60
0
def main():

    from KDEpy import FFTKDE, NaiveKDE
    from KDEpy.binning import linear_binning
    import matplotlib.pyplot as plt
    from scipy import stats

    np.random.seed(123)
    dist = stats.lognorm(1, 1)
    plt.figure(figsize=(14, 6))

    kernel = 'triweight'

    N = 10**3
    data = dist.rvs(int(N))
    plt.scatter(data, np.zeros_like(data), marker='|')
    x, y = NaiveKDE(bw='silverman', kernel=kernel).fit(data)(2**10)
    plt.plot(x, y, label='FFTKDE')
    plt.plot(x, dist.pdf(x), label='True')

    # -----------------------------------------------------------------------
    # Adaptive
    alpha = 1.9
    bw = 'silverman'
    kde = NaiveKDE(kernel='epa', bw=bw)
    kde.fit(data)(x)

    #y = NaiveKDE(bw=kde.bw*lambda_i).fit(x, weights=binned_data*lambda_i)(x)
    #plt.plot(x, y + np.ones_like(x)*0.00, label='Adaptive')

    # The FFTKDE grid may be wrong, but the true density cannot be
    # smaller than (1/N) K(0) at a given point
    min_kde = (1 / int(N)) * kde.kernel(0)
    kde_data = np.maximum(min_kde, kde(data))
    kde_data = kde(data)
    bw = kde.bw * ((kde_data) / stats.mstats.gmean(kde_data))**-alpha
    print(np.min(kde(data)))
    print(stats.mstats.gmean(kde(data)))
    print(kde.bw, bw)
    #bw = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])/100
    plt.scatter(data, kde_data)
    y = NaiveKDE(bw=bw, kernel=kernel).fit(data, weights=None)(x)
    plt.plot(x,
             kde.bw * ((kde(x) + 0) / stats.mstats.gmean(kde_data))**-alpha,
             label='bw')
    plt.plot(x, y + np.ones_like(x) * 0.00, label='Adaptive')
    plt.ylim([0, 0.7])

    plt.legend()
    plt.show()

    print('-' * 32)

    # -----------------------------------------------------------------------
    # Mirror at bounds
    plt.figure(figsize=(14, 6))

    # Beta distribution, where x=1 is a hard lower limit
    dist = stats.beta(a=1.05, b=3, loc=0, scale=1)

    # Plot the normal KDE and the true density
    data = dist.rvs(10**2)
    plt.figure(figsize=(14, 6))
    kde = FFTKDE(bw='silverman', kernel='triweight')
    x, y = kde.fit(data)(2**10)
    plt.figure(figsize=(14, 6))
    plt.plot(x, dist.pdf(x), label='True')
    plt.plot(x, y, label='FFTKDE')
    plt.scatter(data, np.zeros_like(data), marker='|')
    print(np.min(data), np.max(data))

    data_transformed = np.log(data)
    plt.scatter(data_transformed, np.zeros_like(data_transformed), marker='|')
    kde = FFTKDE(bw='silverman', kernel='triweight')
    x, y = kde.fit(data_transformed)(2**10)
    plt.plot(x, y, label='FFTKDE - transformed')

    print(x)
    print(y)
    plt.plot(np.exp(x), 2 * np.exp(y) * (1 + y) - 2)

    plt.ylim([0, 3])
    plt.xlim([-1, 4])

    plt.legend()
    plt.show()

    # -------------------------------------------------------------------------
    # Data on a circle
    # Beta distribution, where x=1 is a hard lower limit
    np.random.seed(123)

    dist1 = stats.norm(loc=0, scale=1)
    dist2 = stats.norm(loc=20, scale=1)
    dist3 = stats.norm(loc=40, scale=1)
    data = np.hstack([dist1.rvs(10**3), dist2.rvs(10**3), dist3.rvs(10**3)])

    plt.figure(figsize=(14, 6))
    x, y = FFTKDE(bw='silverman').fit(data)()
    plt.plot(x, (dist1.pdf(x) + dist2.pdf(x) + dist3.pdf(x)) / 3,
             label='True distribution')
    plt.plot(x, y, label="FFTKDE with Silverman's rule")

    y = FFTKDE(bw='ISJ').fit(data)(x)
    plt.plot(x, y, label="FFTKDE with Improved Sheather Jones (ISJ)")

    plt.legend()
    plt.show()