Esempio n. 1
0
def find_tail_point(w, min_height, ratio):
    """Find best fit area,
    that minimize std of gaussian approximation from waveform.
    find when waveform fall below area * ratio,
    and at least 2 sigma away from mean time.
    """
    if np.max(w) < min_height:
        return []

    # Quick fit to the waveform and locate tail position
    ix = np.arange(len(w))

    # Use center population to estimate mean and sigma
    cnt = w > 0.02*np.max(w)
    mean = np.sum(ix[cnt]*w[cnt])/np.sum(w[cnt])
    sigma = (np.sum((ix[cnt]-mean)**2*w[cnt])/np.sum(w[cnt]))**0.5

    # Use estimated mean and sigma to find amplitude
    amp = np.sum(w[cnt]*norm.pdf(ix[cnt], mean, sigma))/np.sum(norm.pdf(ix[cnt], mean, sigma)**2)

    # Define tail by waveform height drop below certain ratio of amplitude
    tail = np.where(w < ratio*amp)[0]
    tail = tail[(tail > mean+2*sigma) & (tail != len(w))]

    if len(tail) > 0:
        return tail[:1]
    else:
        return []
Esempio n. 2
0
def theta(cflag,s,k,v,r,t):
    d1=(np.log(s/k)+(r+v*v/2)*t)/(v*np.sqrt(t))
    d2=d1-v*np.sqrt(t)
    if cflag:   # call
        return -s*norm.pdf(d1)*v/(2*np.sqrt(t)) - r*k*np.exp(-r*t)*norm.pdf(d2)
    else:       # put
        return -s*norm.pdf(d1)*v/(2*np.sqrt(t)) + r*k*np.exp(-r*t)*norm.pdf(-d2)
Esempio n. 3
0
File: fx.py Progetto: Anhmike/PyMVPA
def dual_gaussian(x,
                  amp1=1.0, mean1=0.0, std1=1.0,
                  amp2=1.0, mean2=0.0, std2=1.0):
    """Sum of two Gaussians.

    Parameters
    ----------
    x : array
      Function argument
    amp1: float
      Amplitude parameter of the first Gaussian
    mean1: float
      Mean parameter of the first Gaussian
    std1: float
      Standard deviation parameter of the first Gaussian
    amp2: float
      Amplitude parameter of the second Gaussian
    mean2: float
      Mean parameter of the second Gaussian
    std2: float
      Standard deviation parameter of the second Gaussian
    """
    from scipy.stats import norm
    if std1 <= 0 or std2 <= 0:
        return np.nan
    return (amp1 * norm.pdf(x, mean1, std1)) + (amp2 * norm.pdf(x, mean2, std2))
Esempio n. 4
0
    def test_mlexplorer(self):

        x = np.arange(0,600,1.0)
        nb_samples = 100 # number of samples in our dataset

        # true partial spectra
        S_1 = norm.pdf(x,loc=200.,scale=130.)
        S_2 = norm.pdf(x,loc=400,scale=70)
        S_true = np.vstack((S_1,S_2))

        #60 samples with random concentrations between 0 and 1
        C_ = np.random.rand(nb_samples)
        C_true = np.vstack((C_,(1-C_))).T

        # We make some observations with random noise
        Obs = np.dot(C_true,S_true) + np.random.randn(nb_samples,len(x))*1e-4

        # new observations
        C_new_ = np.random.rand(10) #10 samples with random concentrations between 0 and 1
        C_new_true = np.vstack((C_new_,(1-C_new_))).T

        noise_new = np.random.randn(len(x))*1e-4
        Obs_new = np.dot(C_new_true,S_true) + noise_new

        explo = rp.mlexplorer(Obs)

        # we just test that it runs for now
        explo.algorithm = 'NMF'
        explo.nb_compo = 2
        explo.test_size = 0.3
        explo.scaler = "MinMax"
        explo.fit()
        explo.refit()
Esempio n. 5
0
    def testGaussian2D(self):
        gauss2d = Gauss2D(0.5/np.pi, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0)

        self.assertEqual(gauss2d.integral(), 1.0)
        self.assertEqual(gauss2d.evaluate(0.0, 0.0), 0.5/np.pi)

        gauss2d.update_params(mu0=-78.9)
        self.assertEqual(gauss2d.evaluate(-78.9, 0.0), 0.5/np.pi)
        self.assertGreater(gauss2d.evaluate(-78.9, 0.0), gauss2d.evaluate(-78.9, 1.0))

        gauss2d.update_params(offset=1234.5)
        self.assertEqual(gauss2d.integral(), 1.0)

        # Test the actual fitting.
        mu0 = 40.0
        mu1 = 60.0
        sigma0 = 10.0
        sigma1 = 7.0
        x = np.arange(100)
        X0, X1 = np.meshgrid(x, x, indexing="ij")
        Z = norm.pdf((X0 - mu0) / sigma0) * norm.pdf((X1 - mu1) / sigma1)

        gauss2d.initial_guess(Z)
        gauss2d.fit(Z)
        self.assertAlmostEqual(gauss2d.param_dict["mu0"], mu0)
        self.assertAlmostEqual(gauss2d.param_dict["mu1"], mu1)
        self.assertAlmostEqual(gauss2d.param_dict["sigma0"], sigma0)
        self.assertAlmostEqual(gauss2d.param_dict["sigma1"], sigma1)
def analyze_pedestrian_queue(file_path):
    """Analyze the pedestrians created by the simulation."""

    destinations, speeds = np.loadtxt(file_path, delimiter=',', unpack=True)

    # Show Histograms of destination selections
    plt.hist(destinations, bins=len(np.unique(destinations)), normed=True)
    plt.xlabel("Selected Destinations (IDs)")
    plt.ylabel("Normalized Frequency (PDF)")
    plt.grid(True)

    # Intended pedestrian speed distribution
    mu = 1.340
    sigma = 0.265
    pdf_range = np.arange(0, 2.5, 0.001)

    # Plot theoretical distribution
    plt.figure()
    plt.plot(pdf_range, norm.pdf(pdf_range, mu, sigma))
    plt.grid(True)
    plt.xlabel("Pedestrian Speeds (m/s)")
    plt.ylabel("PDF")

    # Plot the histogram of speeds with the theoretical distribution overlaid
    plt.figure()
    plt.hist(speeds, bins=int((2*len(speeds))**(1./3)), normed=True)
    plt.grid(True)
    plt.xlabel("Pedestrian Speeds (m/s)")
    plt.ylabel("Normalized Frequency (PDF)")
    plt.plot(pdf_range, norm.pdf(pdf_range, mu, sigma), linewidth=2)
    plt.legend(["Theory", "Actual"])

    plt.show()  # Show the plots
Esempio n. 7
0
 def VOIfunc(self, n, pointNew, grad):
     xNew = pointNew
     nTraining = self._GP._numberTraining
     tempN = nTraining + n
     #  n=n-1
     vec = np.zeros(tempN)
     X = self._PointsHist
     for i in xrange(tempN):
         vec[i] = self._GP.muN(X[i, :], n)
     maxObs = np.max(vec)
     std = np.sqrt(self._GP.varN(xNew, n))
     muNew, gradMu = self._GP.muN(xNew, n, grad=True)
     Z = (muNew - maxObs) / std
     temp1 = (muNew - maxObs) * norm.cdf(Z) + std * norm.pdf(Z)
     if grad == False:
         return temp1
     var, gradVar = self._GP.varN(xNew, n, grad=True)
     gradstd = 0.5 * gradVar / std
     gradZ = ((std * gradMu) - (muNew - maxObs) * gradstd) / var
     temp10 = (
         gradMu * norm.cdf(Z)
         + (muNew - maxObs) * norm.pdf(Z) * gradZ
         + norm.pdf(Z) * gradstd
         + std * (norm.pdf(Z) * Z * (-1.0)) * gradZ
     )
     return temp1, temp10
Esempio n. 8
0
def plot_mcmc_samples(data, samples, true_clusters, x_min= -15, x_max=15, stepsize=0.001):
    # Plot the data
    plt.title('Observed Data Points\n{0} total'.format(len(data)))
    plt.hist(data, 20)
    plt.savefig('points.pdf')
    plt.clf()
    
    # Plot the mean and bands of size 2 stdevs for the samples.
    # Also plot the last sample from the MCMC chain.
    xvals = np.arange(x_min, x_max, stepsize)
    true_pdf = sum(norm.pdf(xvals, mean, stdev) for mean, stdev in true_clusters) / float(len(true_clusters))
    no_bands = np.zeros(len(xvals))
    sample_pdfs = np.array([sum([norm.pdf(xvals, mean, stdev) for mean, stdev in sample]) / float(len(sample)) for sample in samples])
    sample_means = sample_pdfs.mean(axis=0)
    sample_bands = sample_pdfs.std(axis=0)*2.
    last = sample_pdfs[-1]
    means = np.array([true_pdf, sample_means, last])
    bands = np.array([no_bands, sample_bands, no_bands])
    names = ['True PDF', 'Bayes\nEstimate\n(+/- 2 stdevs)', 'Last MCMC\nsample']
    mcmc_params = '{0} points, {1} iterations, {2} burn-in, {3} thin, {4} samples'.format(NUM_POINTS, ITERATIONS, BURN_IN, THIN, len(samples))
    dp_params = 'alpha={0}, mu0={1}, nu0={2}, a0={3}, b0={4}'.format(ALPHA, MU_0, NU_0, A_0, B_0)
    plot_noisy_means('Dirichlet Process Mixture Results', means, bands, names, xvals=xvals, xlabel='X', ylabel='Probability', subtitle='{0}\n{1}'.format(mcmc_params, dp_params))
    
    # Plot the distribution of clusters in the samples
    plot_cluster_distribution(samples)
Esempio n. 9
0
def get_sent_similarity(user_data):
	
	scores=[]

	#=====[ Creates counts for each sentiment score in 21 buckets of width 0.1 from -1 to 1  ]=====
	for data in user_data:
		user_score = [0]*21
		for tweet in data:
			score = int(float("%.1f" % tweet['score'])*10+10)
			user_score[score] += 1
		scores.append(user_score)

	#=====[ Forms normalized probability distributions for each users sentiments  ]=====
	x = np.linspace(-1, 1, 100)
	
	mu, std = norm.fit(scores[0])
	p = norm.pdf(x, mu, std)
	mu, std = norm.fit(scores[1])
	p2 = norm.pdf(x,mu,std)
	
	#=====[ Takes Kullback-Leibler Divergence between probability distributions  ]=====
	similarity = float("%.5f" % scipy.stats.entropy(p,p2))

	#=====[ Converts similarity score to a percentage from 10 - 90 to display on compatability spectrum  ]=====
	if similarity < 0.003: 
		return 90
	elif similarity > 0.07:
		return 10
	else:
		return int(10 + ((similarity*100)-1)/6.7*80)

	return int(similarity)
def dist2weights_gauss(dist, max_r, max_w=1, min_w=1e-3, S=None, rescale=True):
    """Gaussian distance weighting.

    Parameters
    ----------
    dist: float or np.ndarray
        the distances to be transformed into weights.
    max_r: float
        maximum radius of the neighbourhood considered.
    max_w: int (default=1)
        maximum weight to be considered.
    min_w: float (default=1e-8)
        minimum weight to be considered.
    S: float or None (default=None)
        the scale magnitude.
    rescale: boolean (default=True)
        if re-scale the magnitude.

    Returns
    -------
    weights: np.ndarray, array_like, shape (num. of retrievable candidates)
        values of the weight of the neighs inferred from the distances.

    """
    if S is None:
        S = set_scale_gauss(max_r, max_w, min_w)
    if rescale:
        A = max_w/(norm.pdf(0, scale=S)-norm.pdf(max_r, scale=S))
        weights = A*norm.pdf(dist, scale=S)
    else:
        A = max_w/norm.pdf(0, scale=S)
        weights = A*norm.pdf(dist, scale=S)
    return weights
def show_cond():
    results = load_same_loc()
    tt = 8

    # Graph the histogram of activity at the center of
    #  of the room from cell 0
    acts = [x[0] for x in results]
    plt.subplot(2,1,1)
    count, bins, _ = plt.hist(acts,bins=40,normed=Normed)
    plt.title('Original dist. Pts: %i'%(len(acts),))
    xlm = plt.xlim()
    xs = np.linspace(xlm[0],xlm[1],1000)
    mn, std = gaussian(acts)
    plt.plot(xs,norm.pdf(xs,mn,std))

    # Graph the histogram of activity at the center of
    #  of the room from cell 1
    plt.subplot(2,1,2)
    nb = [x[1] for x in results]
    plt.hist(nb,bins=bins,normed=Normed)
    mn, std = gaussian(nb)
    plt.plot(xs,norm.pdf(xs,mn,std))
    plt.title('Side dist. Pts: %i'%(len(nb),))
    plt.xlim(xlm)

    mx_i = np.argmax(count)


    plt.figure()
    plt.subplot(2,1,1)
    nb = [x[1] for x in results]
    plt.hist(nb,bins=bins,normed=Normed)
    plt.title('Side dist. Pts: %i'%(len(nb),))
    mn, std = gaussian(nb)
    plt.plot(xs,norm.pdf(xs,mn,std))
    plt.xlim(xlm)

    plt.subplot(2,1,2)
    iss = get_iss(mx_i+1, bins, acts)
    acts_neighbor = [x[1] for x in results[iss]]
    plt.hist(acts_neighbor,bins=bins,normed=Normed)
    plt.title('Pts: %i'%(len(acts_neighbor)))
    nb = acts_neighbor
    mn, std = gaussian(nb)
    plt.plot(xs,norm.pdf(xs,mn,std))
    plt.xlim(xlm)

    plt.figure()
    for delt in range(-tt,tt):
        iss = get_iss(mx_i+delt, bins, acts)

        acts_neighbor = [x[1] for x in results[iss]]
        plt.subplot(2,tt,delt+tt+0)
        plt.hist(acts_neighbor,bins=bins,normed=Normed)
        nb = acts_neighbor
        mn, std = gaussian(nb)
        plt.plot(xs,norm.pdf(xs,mn,std))
        plt.title('Pts: %i'%(len(acts_neighbor)))
        plt.xlim(xlm)
    plt.show()
Esempio n. 12
0
def mcmc(N=1000, k={"t1":100, "t2":100, "t3":5}, x=[], v=[]):
    chute = {"t1":[10],"t2":[10],"t3":[0.01]}
    M = chute
    hiper = {"t1":[0,100],"t2":[0,100],"t3":[0.1,0.1]} #VALORES DOS HIPERPARAMETROS

    for i in range(N-1):
        for j in M.keys():
            if j == "t1" or j == "t2": 
                M[j].append( np.random.normal(loc = M[j][-1]-k[j]/100, scale = k[j], size = 1) )

                lista = [ [ M[l][-1] for l in M.keys()] , [ M[l][-1] if l!=j else M[l][-2] for l in M.keys() ] ]
                    
                t1 = norm.pdf(M[j][-1], loc = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[0]) * norm.pdf(M[j][-2], loc = M[j][-1]-k[j]/100, scale = k[j])
                t2 = norm.pdf(M[j][-2], loc = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[1]) * norm.pdf(M[j][-1], loc = M[j][-2]-k[j]/100, scale = k[j])
                
                teste = (t1/t2)
            else:
                M[j].append( np.random.gamma(shape = M[j][-1]*k[j], scale = k[j], size = 1) )
                lista = [ [ M[l][-1] for l in M.keys()] , [ M[l][-1] if l!=j else M[l][-2] for l in M.keys() ] ]
                t1 =  gamma.pdf(M[j][-1], a = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[0]) * gamma.pdf(M[j][-2], a = M[j][-1]*k[j], scale = k[j])
                t2 =  gamma.pdf(M[j][-2], a = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[1]) * gamma.pdf(M[j][-1], a = M[j][-2]*k[j], scale = k[j])          

                teste = (t1/t2)
                
            if (min(1 , teste) < np.random.uniform(low = 0, high = 1, size = 1) ) or (np.isinf(teste)) or (np.isnan(teste)) :  
                M[j][-1] = M[j][-2]
            
    return(M)
Esempio n. 13
0
def generate_random_traffic(size=200, date_start="2016/01/01", output_path= "simu.csv"):
    x = np.linspace(1, size, size)
    index = pd.date_range(date_start, periods=size, freq="1H")
    rs = np.random.RandomState()
    g = pd.Series(0, index=index)

    # gaussian mixture
    for i in rs.choice(x, size=size / 3, replace=False):
        spread = rs.uniform(1, 100)
        multiplicator = 5 * spread
        g += pd.Series(norm.pdf(x, i, spread) * multiplicator, index=index)

    # gaussian 24h seasonality
    seasonx = x = np.linspace(1, 24, 24)
    season_index = pd.date_range('01/01/2016', periods=24, freq="1H")
    season = pd.Series(0, index=season_index)
    for i in rs.choice(x[6:-6], size=4, replace=True):
        spread = rs.uniform(1, 20)
        multiplicator = g.mean()
        s = pd.Series(norm.pdf(seasonx, i, spread) * multiplicator, index=season_index)
        season += s

    for i in range(0, len(g) / len(season)):
        g = g.add(season.shift(24 * i, freq="1H") * 3, fill_value=0)

    g = g.add(pd.Series(0, index=index).cumsum())
    g.to_csv(output_path)
Esempio n. 14
0
def simulate_stupidDPM(iter_num, M):
	# Generate mixture sample
	N = 1000
	mu = [0.0, 10.0, 3.0]

	components = np.random.choice(range(3), size = N, replace = True, p = [0.3, 0.5, 0.2])
	samples = [norm.rvs(size = 1, loc = mu[components[i]], scale = 1)[0] for i in range(N)]

	## Sample G from DP(M, G0)
	v = beta.rvs(a = 1.0, b = M, size = N)
	prob_vector = np.append(np.array(v[0]), v[1:] * np.cumprod(1.0 - v[:-1]))
	thetas = norm.rvs(size = N, loc = 1.0, scale = 1.0)

	### Initialize thetas
	thetas = np.random.choice(thetas, size = N, replace = True, p = prob_vector)

	### Start MCMC chain
	for i in xrange(iter_num):
		for j in xrange(N):
			theta_temp = np.append(thetas[:j], thetas[j+1:])
			p = np.append(norm.pdf(samples[j], loc = theta_temp, scale = 1.0), M * norm.pdf(samples[j], loc = 1.0, scale = np.sqrt(2.0)))
			p = p / sum(p)
			temp = np.random.choice(np.append(theta_temp, N), size = 1, replace = True, p = p)
			if (temp == N):
				thetas[j] = norm.rvs(size = 1, loc = 0.5 * (samples[j] + 1), scale = np.sqrt(0.5))
			else:
				thetas[j] = temp
		print(thetas)
	return {"thetas": thetas, "y": samples}
 def __init__(self, image=None, mesh_factor=14, density_distribution=None):
   super(CoordinateSolver, self).__init__()
   self.image = image
   self.height, self.width, _ = self.image.shape
   self.mesh_factor = mesh_factor
   self.height /= self.mesh_factor
   self.width /= self.mesh_factor
   self.image = self.image[:self.mesh_factor*self.height, :self.mesh_factor*self.width]
   if type(density_distribution) == np.ndarray:
     restricted_density = density_distribution[:self.mesh_factor*self.height, :self.mesh_factor*self.width]
     target_areas = restricted_density
     target_areas = target_areas[:-1, :-1]
   else:
     target_areas = np.indices((self.width-1, self.height-1)).T.astype('float32')
     target_areas = norm.pdf(target_areas[:, :, 0], self.width/2, self.width/5)\
                   *norm.pdf(target_areas[:, :, 1], self.height/2, self.height/5)
   target_areas /= sum(sum(target_areas))
   
   normalisation_factor = (self.height-1)*(self.width-1)
   target_areas_normalised = target_areas*normalisation_factor
   self.padded_targets = np.zeros([self.height+1, self.width+1])
   self.padded_targets[1:-1, 1:-1] = target_areas_normalised
   self.coordinates = np.indices((self.width, self.height)).T.astype('float32')
   self.total_error = (self.height-1)*(self.width-1)
   
   self.min_coords = self.coordinates.copy()
   self.areas = calculate_areas(self.coordinates)
   self.errors = np.zeros(self.padded_targets.shape)
   self.x_weights = np.ones([self.height*self.width, self.height + 1, self.width + 1])
   self.y_weights = np.ones([self.height*self.width, self.height + 1, self.width + 1])
   self.make_weights()
def get_beam_loc(ra,dec,utc,beam_a,beam_b,sn_a,sn_b):
    sn_a = float(sn_a)
    sn_b = float(sn_b)
    boresight = ephem.Equatorial(ra,dec)

    ra_a,dec_a,info = coords.radec_of_offset_fanbeam(boresight.ra,boresight.dec,beam_a,utc)
    ns_a,ew_a = info['fanbeam_nsew']
    lst = info['mol_curr'].sidereal_time()
    
    ra_b,dec_b,info = coords.radec_of_offset_fanbeam(boresight.ra,boresight.dec,beam_b,utc)
    ns_b,ew_b =info['fanbeam_nsew']
    
    ns = np.linspace(np.radians(-3.0),np.radians(3.0),size)
    ew = np.linspace(np.radians(-100/3600.0),np.radians(100/3600.0),size)
    ns,ew = np.meshgrid(ns,ew)
    
    beam_a_map = fanbeam_model(ns_a,ew_a,ns+ns_a,ew+ew_a)
    beam_b_map = fanbeam_model(ns_b,ew_b,ns+ns_a,ew+ew_a)

    ratio = sn_a/sn_b
    ratio_sigma = ratio * np.sqrt(1/sn_a**2 + 1/sn_b**2)
    
    ratio_map = beam_a_map/beam_b_map
    ns_fwhm = np.radians(2.0)
    prob_map = norm.pdf(ratio_map,ratio,ratio_sigma) * norm.pdf(ns,0.0,ns_fwhm/2.355)
    prob_map/=prob_map.max()
    
    ns = ns+ns_a
    ew = ew+ew_a
    best_ns = ns[(prob_map.max(axis=1).argmax(),prob_map.max(axis=0).argmax())]
    best_ew = ew[(prob_map.max(axis=1).argmax(),prob_map.max(axis=0).argmax())]
    ha,dec = coords.nsew_to_hadec(best_ns,best_ew)
    ra,dec = coords.radec_to_J2000(lst-ha,dec,utc)
    return ra,dec,ns,ew,prob_map,lst
Esempio n. 17
0
def camera_waveforms():
    camera = CameraGeometry.from_name("CHEC")

    n_pixels = camera.n_pixels
    n_samples = 96
    mid = n_samples // 2
    pulse_sigma = 6
    r_hi = np.random.RandomState(1)
    r_lo = np.random.RandomState(2)

    x = np.arange(n_samples)

    # Randomize times
    t_pulse_hi = r_hi.uniform(mid - 10, mid + 10, n_pixels)[:, np.newaxis]
    t_pulse_lo = r_lo.uniform(mid + 10, mid + 20, n_pixels)[:, np.newaxis]

    # Create pulses
    y_hi = norm.pdf(x, t_pulse_hi, pulse_sigma)
    y_lo = norm.pdf(x, t_pulse_lo, pulse_sigma)

    # Randomize amplitudes
    y_hi *= r_hi.uniform(100, 1000, n_pixels)[:, np.newaxis]
    y_lo *= r_lo.uniform(100, 1000, n_pixels)[:, np.newaxis]

    y = np.stack([y_hi, y_lo])

    return y, camera
Esempio n. 18
0
def model_mixture(separation, prevalence_of_1st):
    x_arr = np.linspace(-7, 7, 100)
    plt.title('%s std delta - %s%% - %s%%'% (separation, prevalence_of_1st*100, (1-prevalence_of_1st)*100))
    plt.plot(x_arr, prevalence_of_1st*norm.pdf(x_arr, loc=-separation/2.), 'r')
    plt.plot(x_arr, (1-prevalence_of_1st)*norm.pdf(x_arr, loc=separation/2.), 'b')
    plt.plot(x_arr, prevalence_of_1st*norm.pdf(x_arr, loc=-separation/2.)
             + (1-prevalence_of_1st)*norm.pdf(x_arr, loc=separation/2.), 'k')
Esempio n. 19
0
def predictive_recursion_fdr(z, sweeporder, grid_x, theta_guess, mu0 = 0.,
							sig0 = 1.0, nullprob = 1.0, decay = -0.67):
	gridsize = grid_x.shape[0]
	theta_subdens = deepcopy(theta_guess)
	pi0 = nullprob
	joint1 = np.zeros(gridsize)
	ftheta1 = np.zeros(gridsize)

	# Begin sweep through the data
	for i, k in enumerate(sweeporder):
		cc = (3. + i)**decay
		joint1 = norm.pdf(grid_x, loc=z[k] - mu0, scale=sig0) * theta_subdens
		m0 = pi0 * norm.pdf(z[k] - mu0, 0., sig0)
		m1 = trapezoid(grid_x, joint1)
		mmix = m0 + m1
		pi0 = (1. - cc) * pi0 + cc * (m0 / mmix)
		ftheta1 = joint1 / mmix
		theta_subdens = (1. - cc) * theta_subdens + cc * ftheta1

	# Now calculate marginal distribution along the grid points
	y_mix = np.zeros(gridsize)
	y_signal = np.zeros(gridsize)
	for i, x in enumerate(grid_x):
		joint1 = norm.pdf(grid_x, x - mu0, sig0) * theta_subdens
		m0 = pi0 * norm.pdf(x, mu0, sig0)
		m1 = trapezoid(grid_x, joint1)
		y_mix[i] = m0 + m1;
		y_signal[i] = m1 / (1. - pi0)

	return {'grid_x': grid_x,
            'sweeporder': sweeporder,
			'theta_subdens': theta_subdens,
			'pi0': pi0,
			'y_mix': y_mix,
			'y_signal': y_signal}
Esempio n. 20
0
	def forward_scaled(self, data):
		'''
		Alpha is the joint probability of observing all data and z
		alpha(z[n]) = p(x1...xn, z[n])

		Use scale parameter to prevent underflow
		'''
		data = data_raw[1]
		length = data.shape[0]

		# Convert data to standard deviations and find probabilities
		data_norm = (data[:,None] - emission_means) / np.sqrt(emission_variances)
		emission_pdf = norm.pdf(data_norm).T
		emission_pdf /= emission_pdf.sum(0)

		alpha = np.zeros([n_states, length], np.float)
		scale = np.zeros(length, np.float)

		# Initialize alpha
		# datum_norm = (data[0] - emission_means) / np.sqrt(emission_variances)
		alpha[:,0] = prior * norm.pdf(data_norm[0])
		scale[0] = 1./alpha[:,0].sum()
		alpha[:,0] *= scale[0]

		# Recursively update alpha
		# for i,d in enumerate(data[1:]):
		for t in xrange(1,length):
			for k in xrange(n_states):
			# 	# alpha[k] = np.sum( p(X|Z) * p(Z'|Z) * alpha[k])
				alpha[k,t] = np.sum( alpha[:,t-1] * transition_matrix[:,k]) * emission_pdf[k,t]
			scale[t] = 1./alpha[:,t].sum()
			alpha[:,t] *= scale[t]
def starloglik(para):
    res = para[0] * norm.pdf(dtemp[:, 17] + para[1], scale=1) + (1 - para[0]) * norm.pdf(
        dtemp[:, 17], loc=para[2], scale=para[3]
    )
    res[res == 0] = 10 ** (-200)
    res = -np.sum(np.log(res))
    return res
Esempio n. 22
0
def BlackScholes():
    data = {}
    S = float(request.args.get('price'))
    K = float(request.args.get('strike'))
    T = float(request.args.get('time'))
    R = float(request.args.get('rate'))
    V = float(request.args.get('vol'))

    d1 = (log(float(S)/K)+(R+V*V/2.)*T)/(V*sqrt(T))
    d2 = d1-V*sqrt(T)

    data['cPrice'] = S*norm.cdf(d1)-K*exp(-R*T)*norm.cdf(d2)
    data['pPrice'] = K*exp(-R*T)-S+data['cPrice']

    data['cDelta'] = norm.cdf(d1)
    data['cGamma'] = norm.pdf(d1)/(S*V*sqrt(T))
    data['cTheta'] = (-(S*V*norm.pdf(d1))/(2*sqrt(T))-R*K*exp(-R*T)*norm.cdf(d2))/365
    data['cVega'] = S*sqrt(T)*norm.pdf(d1)/100
    data['cRho'] = K*T*exp(-R*T)*norm.cdf(d2)/100

    data['pDelta'] = data['cDelta']-1
    data['pGamma'] = data['cGamma']
    data['pTheta'] = (-(S*V*norm.pdf(d1))/(2*sqrt(T))+R*K*exp(-R*T)*norm.cdf(-d2))/365
    data['pVega'] = data['cVega']
    data['pRho'] = -K*T*exp(-R*T)*norm.cdf(-d2)/100

    return json.dumps(data)
Esempio n. 23
0
def obj_func1(params, price_history, vol,
					price_durations, y_bar):

	"""
	Objective function for optimization
	"""
	num_trades = len(price_history) - 1
	P_last = price_history[:-1]
	pdelta = np.array(price_history[1:]) - P_last

	K = len(price_durations) + 1
	informed_prices = params[-K:]	
	P_informed = abmtools.create_price_vector(informed_prices,
									 price_durations, num_trades)
	x = params[:-K]

	Sigma_0 = x[4]
	lfunc1 = lambda x: likelihood1(pdelta, vol,  
		x[0], x[1], x[2], x[3], P_informed, P_last, Sigma_0, y_bar) \
		+ np.log(norm.pdf(x[0], 0.5, 0.1)) \
		+ np.log(norm.pdf(x[1], 5e-2, 1e-1)) \
		+ np.log(norm.pdf(x[2], 100., 15.))  \
		+ np.log(norm.pdf(x[3], 0.02**2, 0.01)) \
		+ np.log(norm.pdf(x[4], 10, 5))

	return -lfunc1(x)
Esempio n. 24
0
def log_posterior_1(params0, price_history, vol,
					price_durations, y_bar):

	"""
	Threshold function for metropolis hastings. It is the the log of posterior
	distribution
	"""
	num_trades = len(price_history) - 1
	P_last = price_history[:-1]
	pdelta = np.array(price_history[1:]) - P_last

	K = len(price_durations) + 1
	informed_prices = params0[-K:]	
	P_informed = abmtools.create_price_vector(informed_prices,
									 price_durations, num_trades)
	x = params0[:-K]

	Sigma_0 = x[4]
	lfunc1 = lambda x: likelihood1(pdelta, vol,  
		x[0], x[1], x[2], x[3], P_informed, P_last, Sigma_0, y_bar) \
		+ np.log(norm.pdf(x[0], 0.3, 0.1)) \
		+ np.log(norm.pdf(x[1], 5e-2, 1e-1)) \
		+ np.log(norm.pdf(x[2], 80., 15.))  \
		+ np.log(norm.pdf(x[3], 0.02**2, 0.01)) \
		+ np.log(norm.pdf(x[4], 10, 5))

	return lfunc1(x)
Esempio n. 25
0
    def lnprob(self, theta):

        lp = 0

        # Covariance amplitude
        lp += self.ln_prior.lnprob(theta[0])

        # Lengthscales
        lp += self.tophat.lnprob(theta[1:self.n_ls + 1])

        # Prior for the Bayesian regression kernel
        pos = (self.n_ls + 1)
        end = (self.n_ls + self.n_lr + 1)
        lp += -np.sum((theta[pos:end]) ** 2 / 10.)

        # Env Noise
        #lp += self.ln_prior_env_noise.lnprob(theta[end])
        #lp += self.ln_prior_env_noise.lnprob(theta[end + 1])
        # alpha
        lp += norm.pdf(theta[end], loc=-7, scale=1)
        # beta
        lp += norm.pdf(theta[end + 1], loc=0.5, scale=1)

        # Noise
        lp += self.horseshoe.lnprob(theta[-1])

        return lp
Esempio n. 26
0
def windowScores3(seq, length):
	diffs = seq[1:] - seq[:-1]
	absDiffs = np.abs(diffs)
	diffs_2 = diffs[1:] - diffs[:-1]
	absDiffs_2 = np.abs(diffs_2)

	# variance_diff = np.var(diffs)
	# expectedDiff = np.mean(absDiffs)
	expectedDiff = np.std(diffs)
	expectedDiffs_2 = absDiffs[:-1] + expectedDiff

	scores = np.zeros(seq.shape)
	actualDiffs = diffs[:-1]
	actualDiffs_2 = absDiffs_2

	# want (actual diff / expected diff) * (expected diff_2 / actual diff_2)
	firstProbs = norm.pdf(actualDiffs / expectedDiff)
	secondProbs = norm.pdf((actualDiffs_2 - expectedDiffs_2) / expectedDiff)
	# scores[1:-1] = (actualDiffs / expectedDiff) * (expectedDiffs_2 / actualDiffs_2)
	scores[1:-1] = firstProbs * secondProbs
	# scores[1:-1] = firstProbs
	# scores[1:-1] = secondProbs

	print describe(scores, 'scores')

	return scores
Esempio n. 27
0
 def theta(self, S, t, vol, r):
     if t > self.T:
         return np.zeros(len(S))
     if self.op_type == 'c':
         return np.subtract(
                 np.true_divide(
                     np.multiply(-vol, 
                         np.multiply(S, norm.pdf(self.d1(S, t, vol, r)))), 
                     np.multiply(2,
                         np.power(
                             np.subtract(self.T, t), .5))),
                 np.multiply(r,
                     np.multiply(self.K,
                         np.multiply(
                             np.exp(
                                 np.multiply(-r,
                                     np.subtract(self.T, t))),
                             norm.cdf(self.d2(S, t, vol, r))))))
     else:
         return np.add(
                 np.true_divide(
                     np.multiply(-vol, 
                         np.multiply(S, norm.pdf(self.d1(S, t, vol, r)))), 
                     np.multiply(2,
                         np.power(
                             np.subtract(self.T, t), .5))),
                 np.multiply(r,
                     np.multiply(self.K,
                         np.multiply(
                             np.exp(
                                 np.multiply(-r,
                                     np.subtract(self.T, t))),
                             norm.cdf(
                                 np.multiply(-1, self.d2(S, t, vol, r)))))))
def plotEstimateVsN():
   """
   Estimate the mean of a Gaussian distribution, plotting the resulting distributions
   as the number of data points increase. The true variance is known in advance.
   """
   # Generate points from a Gaussian distribution
   mu = -0.8 # Mean
   var = 0.1 # Variance

   # Use mean of 0 for prior, and the true variance for both prior and likelihoods
   data = np.random.normal(mu, var, 10)

   # Plot the prior distribution with mean 0 and true variance
   x = np.linspace(-1, 1, 100)
   plt.plot(x, norm.pdf(x, 0, np.sqrt(var)), label="N = 0")
   
   # Plot distribution as N gets larger
   for i in [1, 2, 10]:
      # Estimate the mean and variance from i data points
      mu, v = estimateBayes(data[:i], 0, var, var)

      # Plot the normal distribution curve
      plt.plot(x, norm.pdf(x, mu, np.sqrt(v)), label="N = {0}".format(i))

   plt.legend()
   plt.show()
Esempio n. 29
0
def P_number_true_obs_fast(args):
	E_true_links = 0
	p_at_least_one_bp_at_given_position = 1- P_breakpoints_in_interval(1, args.bp_ratio, 0)
	k = 2 * args.readlen / float(args.cov)
	
	for i in range((args.insertion_size + args.readlen - args.soft)+1, args.mean + 4*args.stddev):
		# When internal breakpoint occur within mean + 4*sigma
		E_true_links += 2*(1/k) * (v(i,args.insertion_size, args.readlen, args.soft) - 1 ) * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position**2
		# when no breakpoint occurs on one side
		E_true_links += 2*(1/k) * 1                                                        * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position
		# when no breakpoint occurs on both sides
		E_true_links += (1/k) * 1 															* norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)

		#print v(i,args.insertion_size, args.readlen, args.soft)
	# when no breakpoint occurs on one side
	i = args.mean + 4*args.stddev
	E_true_links += 2*(1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position

	# when no breakpoint occurs on both sides
	i = args.mean + 4*args.stddev
	print v(i,args.insertion_size, args.readlen, args.soft)
	print 1/k
	E_true_links += (1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)

 
	return E_true_links
def naive_bayes(data,classifier,sample):
    from scipy.stats import norm
    idx_male = array([i for i in range(len(classifier)) if classifier[i]==0])
    idx_female = array([i for i in range(len(classifier)) if classifier[i]==1])
    mean_male = mean(data[idx_male,:],0)
    std_male = std(data[idx_male,:],0)
    mean_female = mean(data[idx_female,:],0)
    std_female = std(data[idx_female,:],0)

    probs_female = []
    for i in range(len(mean_female)):
        probs_female.append( norm.pdf(sample[i],mean_female[i],std_female[i]))

    probs_male = []
    for i in range(len(mean_male)):
        probs_male.append( norm.pdf(sample[i],mean_male[i],std_male[i]))

    p_male = cumprod(probs_male)[-1] * 0.5
    p_female = cumprod(probs_female)[-1] * 0.5
    if p_male > p_female:
        print 'The person is MALE',     'says: Naive Bayes'
        return 0
    else:
        print 'The person is FEMALE','says: : Naive Bayes'
        return 1
def tsmaker(m, s, j):
    t = np.arange(0.0, 1.0, 0.01)
    v = norm.pdf(t, m, s) + j * np.random.randn(100)
    return ts.TimeSeries(v, t)
Esempio n. 32
0
def gfit(inp, sec_name):

    #Gaussian fit at a given energy

    sum_edep = inp[sec_name]

    nbins = 60

    plt.style.use("dark_background")
    col = "lime"
    #col = "black"

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    set_axes_color(ax, col)

    #data plot
    hx = plt.hist(sum_edep,
                  bins=nbins,
                  color="lime",
                  density=True,
                  label="edep")

    #bin centers for the fit
    centers = (0.5 * (hx[1][1:] + hx[1][:-1]))

    #pass1, fit over the full range
    fit_data = DataFrame({"E": centers, "density": hx[0]})
    pars, cov = curve_fit(lambda x, mu, sig: norm.pdf(x, loc=mu, scale=sig),
                          fit_data["E"], fit_data["density"])

    #print "pass1:", pars[0], pars[1]

    #pass2, fit in +/- 2*sigma range
    fitran = [pars[0] - 2. * pars[1],
              pars[0] + 2. * pars[1]]  # fit range at 2*sigma
    fit_data = fit_data[fit_data["E"].between(
        fitran[0], fitran[1], inclusive=False)]  # select the data to the range
    pars, cov = curve_fit(lambda x, mu, sig: norm.pdf(x, loc=mu, scale=sig),
                          fit_data["E"], fit_data["density"])

    #print "pass2:", pars[0], pars[1]

    #fit function
    x = np.linspace(fitran[0], fitran[1], 300)
    y = norm.pdf(x, pars[0], pars[1])

    plt.plot(x, y, "k-", label="norm", color="red")

    ax.set_xlabel("Calorimeter signal (GeV)")
    ax.set_ylabel("Counts / {0:.3f} GeV".format(
        (plt.xlim()[1] - plt.xlim()[0]) / nbins))

    #ax.set_title(r"$\text{"+infile+"}$")
    #print infile

    set_grid(plt, col)

    #mean_str = "{0:.4f} \pm {1:.4f}".format(pars[0], np.sqrt(cov[0,0]))
    #sigma_str = "{0:.4f} \pm {1:.4f}".format(pars[1], np.sqrt(cov[1,1]))
    #res = pars[1]/pars[0]
    #res_str = "{0:.4f}".format(res)
    #fit_param = r"\begin{align*}\mu &= " + mean_str + r"\\ \sigma &= " + sigma_str + r"\\"
    #fit_param += r"\sigma/\mu &= " + res_str + r"\end{align*}"

    #plot legend
    #leg_items = [Line2D([0], [0], lw=2, color="red"), Line2D([0], [0], lw=0)]
    leg_items = [Line2D([0], [0], lw=0)]
    #plt.rc("text", usetex = True)
    #plt.rc('text.latex', preamble='\usepackage{amsmath}')
    #ax.legend(leg_items, ["Gaussian fit", fit_param])
    ax.legend(leg_items, [
        "{0:.4f}, {1:.4f}, {2:.4f}".format(pars[0], pars[1], pars[1] / pars[0])
    ])

    #output log
    #out = open("out.txt", "w")
    #out.write( "{0:.4f} +/- {1:.4f} | ".format(pars[0], np.sqrt(cov[0,0])) )
    #out.write( "{0:.4f} +/- {1:.4f} | ".format(pars[1], np.sqrt(cov[1,1])) )
    #out.write( "{0:.4f}".format(pars[1]/pars[0]) )
    #out.close()

    fig.savefig("01fig.pdf", bbox_inches="tight")
    plt.close()

    return pars[0], pars[1]
Esempio n. 33
0
def likelihood(x, vol):
    sd = np.exp(vol)
    prob = 0
    for i in norm.rvs(loc=mean_mu, scale=mean_sd, size=100):
        prob = prob + norm.pdf(x, i, sd) / 100
    return prob
Esempio n. 34
0
def pOfXpdf(x, feature):
    sigma = standardDeviation(feature)
    mean = sampleMean(feature)
    prob = norm.pdf(x, loc=mean, scale=sigma)
    return prob
        summ = sum(diff)
        # print ("SUM - ",summ)
        summ /= (n * h * 1.0)
        y.append(summ)

    # print ("Prazen " - y)
    return y


#Q-7-a
# getting alphas
X = np.arange(-5, 10, 0.1)
alpha = X
pr_dis = get_data()
# True distribution
Y0 = 0.25 * (norm.pdf(alpha, 0, 1) + norm.pdf(alpha, 3, 1) +
             norm.pdf(alpha, 6, 1) + norm.pdf(alpha, 9, 1))
# get kernel density estimator
Y = get_prazen(alpha, 0.1, pr_dis)
Y2 = get_prazen(alpha, 1, pr_dis)
Y3 = get_prazen(alpha, 7, pr_dis)

# plotting the graphs
plt.figure('Kernel Density Estimation', figsize=(15, 10))
plt.plot(X, Y0, label='True pdf')
plt.plot(X, Y, label='h = 0.1')
plt.plot(X, Y2, label='h = 1')
plt.plot(X, Y3, label='h = 7')

plt.xlabel('x', fontsize=18)
plt.ylabel('Pr[X<=x]', fontsize=18)
Esempio n. 36
0
    def bayesian_calculation_update(self, sensor_inputs, time_frame, ITER):
        error_val_list = []
        mse_list = []
        self.create_error_lists(time_frame)
        err_names = []
        probs = []
        errors = []
        error_names = []
        self.search_helper({}, {}, errors, error_names)
        # need to make sure all the lists do not have conflicts
        errors, error_names = self.check_conflicts(errors, error_names)
        for i in range(len(errors)):
            err_name = tuple(
                set([
                    item for sublist in error_names[i].values()
                    for item in sublist
                ]))
            err_val = errors[i]
            full_err_name = error_names[i]
            if self.fft is True:
                for sensor in self.sensor_list:
                    err_val[sensor] = scipy.ifft(err_val[sensor])

            for key, value in self.sirens.items():
                if full_err_name[key] in value.keys():
                    data_size = self.stored_data[key][
                        full_err_name[key]][1].shape[0]
                    val = self.error_calculation(error_list=full_err_name[key],
                                                 sensor=key,
                                                 x=time_frame)
                    err_val[key] = (1 - 1 / np.log(data_size)) * val + (
                        1 / np.log(data_size)) * err_val[key]
                #if err_name == ('Error2',):
                #plt.clf()
                #plt.plot(time_frame, err_val[key], color= "red", label = "Estimated Line")
                #plt.plot(time_frame, sensor_inputs[key], color = "green", label = "Actual Line")
                #plt.xlabel("Timestamp")
                #plt.ylabel("Line Val")
                #plt.title(str(ITER))
                #plt.legend()
                #plt.grid()
                #plt.show()
            error_val_list.append(err_val)
            prob = 0.0
            mses = {}
            for j in self.sensor_list:
                for i in range(len(sensor_inputs[j])):
                    gaussian_err = norm.pdf(sensor_inputs[j][i],
                                            loc=err_val[j][i],
                                            scale=self.sigmas[j])
                    prob += np.log(gaussian_err)

                mse = ((sensor_inputs[j] - err_val[j])**2).mean()
                mses[j] = mse

            err_names.append(err_name)
            probs.append(prob)
            mse_list.append(mses)

        ind, error, pro, mse = self.find_most_probable_error(
            err_names, probs, mse_list)
        return error, pro, mse, error_val_list[ind]
Esempio n. 37
0
def energy_landscape_example():
    fig, [ax] = panel(1, 1, l_p=0, r_p=0, b_p=0, t_p=0, dpi=100)
    rectangle(-1000, 1000, -1000, 1000, fc='g', ec='m', lw=4)
    plt.axis('off')

    xlim([-1000, 1000])
    ylim([-1000, 1000])

    from scipy.stats import norm

    np.random.seed(584111)

    x = np.arange(-1000, 1000, 1)
    y = 50 * np.sin(0.02 * x) + 50 * np.sin(0.05 * x) - 300 * np.sin(
        0.007 * x) + 500
    for i in range(1, 11):
        y -= np.random.random() * 2000 * norm.pdf((-1000 + 300 * i + x) / 20)
    y[0], y[-1] = -1000, -1000
    ax.fill(x, y, c='m', alpha=0.2)
    ax.plot(x, y, c='m', alpha=0.5)

    pos = [-670]
    for i in pos:
        xn = np.argmax(i < x)
        ax.arrow(x[xn] + 30 + 100,
                 y[xn] + 30,
                 -50,
                 0,
                 width=15,
                 fc='grey',
                 ec='grey')
        ax.arrow(x[xn] - 30 - 100,
                 y[xn] + 30,
                 50,
                 0,
                 width=15,
                 fc='grey',
                 ec='grey')
        ellipse([x[xn], y[xn] + 30], 20, alpha=1, fc='b')

    pos = [320]
    for i in pos:
        xn = np.argmax(i < x)
        ax.arrow(x[xn] + 30, y[xn] + 30, 50, 0, width=15, fc='grey', ec='grey')
        ax.arrow(x[xn] - 30,
                 y[xn] + 30,
                 -50,
                 0,
                 width=15,
                 fc='grey',
                 ec='grey')
        ellipse([x[xn], y[xn] + 30], 20, alpha=1, fc='b')

    text(400, y[1400] - 100, r'\textbf{Global minima}', ha='center')
    text(250 + 100, y[1250] - 300, r'\textbf{Local minima}', ha='right')
    ax.arrow(
        225,
        y[1225] - 200,
        0,
        130,
        width=15,
        fc='k',
    )
    arrow(
        pos=[[-300, y[1250] - 300], [x[330], y[330]]],
        curve=-0.5,
    )
Esempio n. 38
0
sample_std = np.std(x_sample, ddof=1)

#------------------------------------------------------------
# Plot the sampled data
fig, ax = plt.subplots(figsize=(5, 3.75))

ax.hist(x_sample, 20, histtype='stepfilled', normed=True, fc='#CCCCCC')
x = np.linspace(-2.1, 4.1, 1000)

factor1 = ratio_in / (1. + ratio_in)
factor2 = 1. / (1. + ratio_in)

ax.plot(x, gm.pdf(x), '-k', label='true distribution')
ax.plot(x, gm.pdf_individual(x), ':k')

ax.plot(x, norm.pdf(x, sample_mu, sample_std), '--k', label='best fit normal')

ax.legend(loc=1)

ax.set_xlim(-2.1, 4.1)

ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')
ax.set_title('Input pdf and sampled data')
ax.text(0.95,
        0.80, ('$\mu_1 = 0;\ \sigma_1=0.3$\n'
               '$\mu_2=1;\ \sigma_2=1.0$\n'
               '$\mathrm{ratio}=1.5$'),
        transform=ax.transAxes,
        ha='right',
        va='top')
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
plt.close('all')


plt.figure(1, figsize=(15, 7))
x = [(i + 1) * 0.1 for i in range(9)]
y = [norm.ppf(e) for e in x]
plt.subplot(121)
plt.plot(x, y, '-*b')
plt.ylabel('percentile', labelpad=18)
plt.xlabel(r'$\alpha$', labelpad=18)


# plot our distribution


xx = np.linspace(norm.ppf(0.01),
                norm.ppf(0.99), 100)
plt.figure(1)
plt.subplot(122)
plt.plot(xx, norm.pdf(xx),
       'r-', lw=2, alpha=0.6)
plt.ylim([0, 0.5])
for e in  y:
    print(norm.pdf(e))
    plt.axvline(e, 0, norm.pdf(e) / 0.5,
                color='black', alpha=0.5)
plt.ylabel('$f(x)$', labelpad=18)
plt.xlabel('$x$', labelpad=18)
plt.savefig('percentile_normal.png',
             bbox_inches='tight', dpi=300)
Esempio n. 40
0
    amp = np.max(px_spec_avg)
    new_dict[pixel] = amp
# try baseline subtraction (from Brad's Renishaw-Reniawesome)
base = baseline(px_spec[:,1], 4)
plt.plot(base)
base_sub = px_spec[:,1]-base
plt.plot(base_sub)

# gaussian fit try 1
popt, _ = optimize.curve_fit(gaussian, px_spec[:,0], px_spec[:,1])
plt.plot(px_spec[:,0], px_spec[:,1])
plt.plot(px_spec[:,0], gaussian(px_spec[:,0], *popt))

# gaussian fit try 2
mean,std=norm.fit(px_spec[:,1])
px_fit = norm.pdf(px_spec[:,0], mean, std)
plt.plot(px_spec[:,0], px_spec[:,1])
plt.plot(px_spec[:,0], px_fit)

# gaussian fit try 3
fitter = modeling.fitting.LevMarLSQFitter()
model = modeling.models.Gaussian1D()   # depending on the data you need to give some initial values
fitted_model = fitter(model, px_spec[:,0], px_spec[:,1])
plt.plot(px_spec[:,0], px_spec[:,1])
plt.plot(px_spec[:,0], fitted_model(px_spec[:,0]))

#pixel_spectrum_arr_5avg = moving_average(pixel_spectrum_arr[:,1], 5)

# copy dictionary key
# set value of copy dictionary key to max amplitude
# store this key-value pair in new dictionary
Esempio n. 41
0
 def get_emission(self, n, state):
     X = self.X
     phi = self.phi
     prior = norm.pdf(X[n], phi[state][0], phi[state][1])
     return (prior)
import os
figdir = os.path.join(os.environ["PYPROBML"], "figures")
def save_fig(fname): plt.savefig(os.path.join(figdir, fname))


from scipy.stats import t, laplace, norm

a = np.random.randn(30)
outliers = np.array([8, 8.75, 9.5])
plt.hist(a, 7, weights=[1 / 30] * 30, rwidth=0.8)

#fit without outliers
x = np.linspace(-5, 10, 500)

loc, scale = norm.fit(a)
n = norm.pdf(x, loc=loc, scale=scale)

loc, scale = laplace.fit(a)
l = laplace.pdf(x, loc=loc, scale=scale)

fd, loc, scale = t.fit(a)
s = t.pdf(x, fd, loc=loc, scale=scale)
plt.plot(x, n, 'k>',
        x, s, 'r-',
        x, l, 'b--')
plt.legend(('Gauss', 'Student', 'Laplace'))
save_fig('robustDemoNoOutliers.pdf')

#add the outliers
plt.figure()
plt.hist(a, 7, weights=[1 / 33] * 30, rwidth=0.8)
Esempio n. 43
0
def getNegLogProbNorm(Param):
    avg = Param[0]
    std = Param[1]
    getNegLogProbNorm = -np.sum(np.log(norm.pdf(x=Data, loc=avg, scale=std)))
    return getNegLogProbNorm
Esempio n. 44
0
def editsolvent_removal2(solvent, y_data, x_data, picked_peaks, peak_regions, grouped_peaks, total_params, uc):
    picked_peaks = np.array(picked_peaks)

    # define the solvents

    if solvent == 'chloroform':

        exp_ppm = [7.26]

        Jv = [[]]

    elif solvent == 'dimethylsulfoxide':

        exp_ppm = [2.50]

        Jv = [[1.9, 1.9]]

    elif solvent == 'methanol':

        exp_ppm = [4.78, 3.31]

        Jv = [[], [1.7, 1.7]]

    elif solvent == 'benzene':

        exp_ppm = [7.16]

        Jv = [[]]

    elif solvent == 'pyridine':

        exp_ppm = [8.74, 7.58, 7.22]

        Jv = [[], [], []]

    else:
        exp_ppm = []
        Jv = [[]]

    # find picked peaks ppm values

    picked_peaks_ppm = x_data[picked_peaks]

    # make differences vector for referencing against multiple solvent peaks

    differences = []

    peaks_to_remove = []

    solvent_regions = []

    # now remove each peak in turn

    for ind1, speak_ppm in enumerate(exp_ppm):

        # if only a singlet is expected for this peak find solvent peak based on amplitude and position

        if len(Jv[ind1]) == 0:

            probs = norm.pdf(abs(picked_peaks_ppm - speak_ppm), loc=0, scale=0.1) * y_data[picked_peaks]

            # find the maximum probability

            w = np.argmax(probs)

            # append this to the list to remove

            peaks_to_remove.append(picked_peaks[w])

            # append this to the list of differences

            differences.append(speak_ppm - picked_peaks_ppm[w])

        # if the peak displays a splitting pattern then we have to be a bit more selective
        # do optimisation problem with projected peaks
        else:

            amp_res = []
            dist_res = []
            pos_res = []

            # limit the search to peaks +- 1 ppm either side

            srange = (picked_peaks_ppm > speak_ppm - 1) * (picked_peaks_ppm < speak_ppm + 1)

            for peak in picked_peaks_ppm[srange]:

                # print("picked ppm ", peak)

                fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak(peak, Jv[ind1], np.arange(len(x_data)), 0.1, uc,
                                                                        1)

                diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks)))

                for i, f in enumerate(fit_s_peaks):

                    for j, g in enumerate(picked_peaks):
                        diff_matrix[i, j] = abs(f - g)

                # minimise these distances

                vertical_ind, horizontal_ind = optimise(diff_matrix)

                closest_peaks = np.sort(picked_peaks[horizontal_ind])

                closest_amps = []

                for cpeak in closest_peaks:
                    closest_amps.append(total_params['A' + str(cpeak)])

                # find the amplitude residual between the closest peaks and the predicted pattern

                # normalise these amplitudes

                amp_vector = [i / max(amp_vector) for i in amp_vector]

                closest_amps = [i / max(closest_amps) for i in closest_amps]

                # append to the vector

                amp_res.append(sum([abs(amp_vector[i] - closest_amps[i]) for i in range(len(amp_vector))]))

                dist_res.append(np.sum(np.abs(closest_peaks - fit_s_peaks)))

                pos_res.append(norm.pdf(abs(peak - speak_ppm), loc=0, scale=0.5))

                # use the gsd data to find amplitudes of these peaks

            pos_res = [1 - i / max(pos_res) for i in pos_res]

            dist_res = [i / max(dist_res) for i in dist_res]

            amp_res = [i / max(amp_res) for i in amp_res]

            # calculate geometric mean of metrics for each peak

            g_mean = [(dist_res[i] + amp_res[i] + pos_res[i]) / 3 for i in range(0, len(amp_res))]

            # compare the residuals and find the minimum

            minres = np.argmin(g_mean)

            # append the closest peaks to the vector

            fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak(picked_peaks_ppm[srange][minres], Jv[ind1],
                                                                    np.arange(len(x_data)), 0.1, uc, 1)

            diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks)))

            for i, f in enumerate(fit_s_peaks):

                for j, g in enumerate(picked_peaks):
                    diff_matrix[i, j] = abs(f - g)

            # minimise these distances

            vertical_ind, horizontal_ind = optimise(diff_matrix)

            closest_peaks = np.sort(picked_peaks[horizontal_ind])

            for peak in closest_peaks:
                ind3 = np.abs(picked_peaks - peak).argmin()

                peaks_to_remove.append(picked_peaks[ind3])

                differences.append(picked_peaks_ppm[ind3] - uc.ppm(peak))

    # find the region this peak is in and append it to the list

    for peak in peaks_to_remove:

        for ind2, region in enumerate(peak_regions):

            if (peak > region[0]) & (peak < region[-1]):
                solvent_regions.append(ind2)
                break

    # now remove the selected peaks from the picked peaks list and grouped peaks

    w = np.searchsorted(picked_peaks, peaks_to_remove)

    picked_peaks = np.delete(picked_peaks, w)

    for ind4, peak in enumerate(peaks_to_remove):
        grouped_peaks[solvent_regions[ind4]] = np.delete(grouped_peaks[solvent_regions[ind4]],
                                                         np.where(grouped_peaks[solvent_regions[ind4]] == peak))

    # resimulate the solvent regions

    solvent_region_ind = sorted(list(set(solvent_regions)))

    # now need to reference the spectrum

    # differences = list of differences in ppm found_solvent_peaks - expected_solvent_peaks

    s_differences = sum(differences)

    x_data = x_data + s_differences

    return peak_regions, picked_peaks, grouped_peaks, x_data, solvent_region_ind
Esempio n. 45
0
#!/usr/bin/env python3
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')

# compute cumulative distribution function (CDF): P(-1.45 < Z < 1.45)
x = -1.45; y = 1.45; loc = 0; scale = 1
pls = norm.cdf(y, loc, scale) - norm.cdf(x, loc, scale)
print("Area under the standard normal curve P(-1.45 < Z < 1.45): {:.5f}".format(pls))

x1=np.linspace(x,y, 1000); y1=norm.pdf(x1,loc,scale)
x2=np.linspace(-4,4,1000); y2=norm.pdf(x2,loc,scale)
ax = plt.figure().add_subplot(111); ax.minorticks_on()
ax.plot(x2,y2,c='r',label='P(-1.45 < Z < 1.45) = {:.5f}'.format(pls))
ax.set(xlabel='X',ylabel='P(X)'); ax.legend(handlelength=0)
ax.fill_between(x1,y1,alpha=0.5,color='r'); ax.set(xlim=[-4,4])
plt.savefig('norm1f.pdf',dpi=72,bbox_inches='tight'); plt.show()
Esempio n. 46
0
def test_probit():

    for Y, T in [(np.random.binomial(1, 0.5, size=(10, )), np.ones(10)),
                 (np.random.binomial(1, 0.5, size=(10, )), None),
                 (np.random.binomial(3, 0.5, size=(10, )), 3 * np.ones(10))]:
        X = np.random.standard_normal((10, 5))

        for case_weights in [None, np.ones(10)]:
            L = glm.glm.probit(X, Y, trials=T, case_weights=case_weights)
            L.smooth_objective(np.zeros(L.shape), 'both')
            L.hessian(np.zeros(L.shape))

            sat_sub = L.saturated_loss.subsample(np.arange(
                5))  # check that subsample of saturated loss at least works
            sat_sub.smooth_objective(np.zeros(sat_sub.shape))

            # check that subsample is getting correct answer

            Xsub = X[np.arange(5)]
            Ysub = Y[np.arange(5)]
            if T is not None:
                Tsub = T[np.arange(5)]
                T_num = T
            else:
                Tsub = np.ones(5)
                T_num = np.ones(10)

            beta = np.ones(L.shape)

            if case_weights is not None:
                Lsub2 = glm.glm.probit(Xsub,
                                       Ysub,
                                       trials=Tsub,
                                       case_weights=case_weights[np.arange(5)])
                Lsub3 = glm.glm.probit(Xsub,
                                       Ysub,
                                       trials=Tsub,
                                       case_weights=case_weights[np.arange(5)])
                case_cp = case_weights.copy() * 0
                case_cp[np.arange(5)] = 1
                Lsub4 = glm.glm.probit(X, Y, trials=T, case_weights=case_cp)
            else:
                Lsub2 = glm.glm.probit(Xsub, Ysub, trials=Tsub)
                Lsub3 = glm.glm.probit(Xsub, Ysub, trials=Tsub)

            Lsub3.coef *= 2.

            f2, g2 = Lsub2.smooth_objective(beta, 'both')
            f3, g3 = Lsub3.smooth_objective(beta, 'both')
            f4, g4 = Lsub2.smooth_objective(beta, 'both')

            np.testing.assert_allclose(f3, 2 * f2)
            np.testing.assert_allclose(g3, 2 * g2)

            np.testing.assert_allclose(f2, f4)
            np.testing.assert_allclose(g2, g4)

            Lcp = copy(L)
            prev_value = L.smooth_objective(np.zeros(L.shape), 'func')
            L_sub = L.subsample(np.arange(5))
            L_sub.coef *= 45
            new_value = L.smooth_objective(np.zeros(L.shape), 'func')
            assert (prev_value == new_value)

            np.testing.assert_allclose(L_sub.gradient(beta),
                                       45 * Lsub2.gradient(beta))

            linpred = X.dot(beta)
            np.testing.assert_allclose(
                L.gradient(beta),
                X.T.dot(-normal_dbn.pdf(linpred) *
                        (Y / normal_dbn.cdf(linpred) -
                         (T_num - Y) / normal_dbn.sf(linpred))))

            linpred = Xsub.dot(beta)
            np.testing.assert_allclose(
                L_sub.gradient(beta),
                45 * Xsub.T.dot(-normal_dbn.pdf(linpred) *
                                (Ysub / normal_dbn.cdf(linpred) -
                                 (Tsub - Ysub) / normal_dbn.sf(linpred))))

            # other checks on gradient

            if T is None:
                sat = L.saturated_loss
                np.testing.assert_allclose(
                    sat.smooth_objective(np.zeros(sat.shape), 'grad'),
                    (0.5 - Y) * normal_dbn.pdf(0) / 0.25)
                np.testing.assert_allclose(
                    L.gradient(np.zeros(L.shape)),
                    X.T.dot(0.5 - Y) * normal_dbn.pdf(0) / 0.25)
                np.testing.assert_allclose(
                    L.hessian(np.zeros(L.shape)),
                    X.T.dot(X) / 0.25 * normal_dbn.pdf(0)**2)
            else:
                L.gradient(np.zeros(L.shape))
                L.hessian(np.zeros(L.shape))

            L.objective(np.zeros(L.shape))
            L.latexify()

            L.saturated_loss.data = (Y, T)
            L.saturated_loss.data

            L.data = (X, (Y, T))
            L.data
Esempio n. 47
0
from scipy.stats  import norm
from scipy.misc import comb
from scipy.stats import beta
from scipy import integrate
import matplotlib.pyplot as plt
import numpy 
import math
import pylab 
alpha=[5,3,1]   # define alpha pars for inverse gamma dist
betaa=[5,15,30] # define beta pars for inverse gamma dist
y_value=[10,3,3] # number of heads
N=[20,10,10]    #number of tosses 
x=numpy.linspace(-0.25,1,100)  # intervals for r ( it can't be megative, but negative r is considered  for visul purposeis )
for i in range( len(alpha)):
    a=' Inverse Gamma :alpha={}   beta={}'.format(alpha[i],betaa[i])
    b=' Laplace Approx :alpha={}   beta={}'.format(alpha[i],betaa[i])
    real_dist= lambda x : comb(N[i],y_value[i])*(x**y_value[i])*((1-x)**(N[i]-y_value[i]))*beta.pdf(x,alpha[i],betaa[i]) # prior * liklihood
    marginal_liklihood=integrate.quad(real_dist, x[0], x[-1])[0]  # calculating marginal liklihood (i.e, normalization tep)
    map_r=(y_value[i]+alpha[i]-1)/(alpha[i]+N[i]+betaa[i]-2)  # MAP for r 
    print('r={}',format(map_r))
    g_2d=((-alpha[i]-y_value[i]+1)/(map_r**2))+((-betaa[i]-N[i]+y_value[i]+1)/(1-map_r)**2)  # calculating hessian matrix
    variance_laplace=-(1/g_2d)
    plt.plot(x,norm.pdf(x,map_r, math.sqrt(variance_laplace)),label=b)
    print('sigma={}',format(math.sqrt(variance_laplace)))
    plt.plot(x,real_dist(x)/marginal_liklihood,label=a)
    pylab.legend(loc='upper right')
    #normal_pdf=norm.pdf(x,map_r, math.sqrt(variance_laplace))
   # real_dist=comb(N[i],y_value[i])*(x**y_value[i])*((1-x)**(N[i]-y_value[i]))*beta.pdf(x,alpha[i],betaa[i])
plt.xlabel('r value')    
plt.ylabel('p(r)')  
Esempio n. 48
0
 def out(w1, w2):
     err = y0 - (w1 + w2 * x0)
     return norm.pdf(err, loc=0, scale=likelihoodSD)
Esempio n. 49
0
fig, ax = plt.subplots()
ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.9)
ax.legend(loc='upper center')
plt.show()


fig, ax = plt.subplots()
ax.plot(x, y, 'r-', linewidth=2, label='$y=\sin(x)$', alpha=0.6)
ax.legend(loc='upper center')
ax.set_yticks([-1,-.5, 0, .5, 1])
plt.title('Test plot')
plt.show()



##More plot on one axis

from scipy.stats import norm
from random import uniform

fig, ax = plt.subplots()
x = np.linspace(-4, 4, 150)
for i in range(3):
    print(i)
    m, s = uniform(-1, 1), uniform(1, 2)
    y = norm.pdf(x, loc=m, scale=s)
    current_label = '$\mu = {m:.2}$'
    ax.plot(x, y, linewidth=2, alpha=0.6, label=current_label)
ax.legend()
plt.show()
Esempio n. 50
0
    def precompute_sensor_model(self):
        print "Precomputing sensor model"
        table_width = int(self.MAX_RANGE_PX) + 1

        #meshgrid of data
        (x, y) = np.meshgrid(np.linspace(0, self.MAX_RANGE_PX, table_width),
                             np.linspace(0, self.MAX_RANGE_PX, table_width))
        #normal along identity
        z = 2 * norm.pdf(x, y, 5)
        #uniform
        z += 2.0 / self.MAX_RANGE_PX
        #ramp
        for row in xrange(table_width):
            z[row][0:row] += .01 - .01 * np.arange(row, dtype=np.float32) / row
        #max_dist
        z[:, -1:] = .3
        #normalize
        #for i in range(len(z)):
        #    z[i]= z[i]/sum(z[i])
        z / z.sum(axis=1, keepdims=True)
        #transpose and save it had to use ascontiguousarray for cpython to be happy
        self.sensor_model_table = np.ascontiguousarray(z.T)
        # upload the sensor model to RangeLib for ultra fast resolution later
        self.range_method.set_sensor_model(self.sensor_model_table)

        # code to generate visualizations of the sensor model
        if False:
            # visualize the sensor model
            fig = plt.figure()
            ax = fig.gca(projection='3d')

            # Make data.
            X = np.arange(0, table_width, 1.0)
            Y = np.arange(0, table_width, 1.0)
            X, Y = np.meshgrid(X, Y)

            # Plot the surface.
            surf = ax.plot_surface(X,
                                   Y,
                                   self.sensor_model_table,
                                   cmap="bone",
                                   rstride=2,
                                   cstride=2,
                                   linewidth=0,
                                   antialiased=True)

            ax.text2D(0.05,
                      0.95,
                      "Precomputed Sensor Model",
                      transform=ax.transAxes)
            ax.set_xlabel('Ground truth distance (in px)')
            ax.set_ylabel('Measured Distance (in px)')
            ax.set_zlabel('P(Measured Distance | Ground Truth)')

            plt.show()
        elif False:
            plt.imshow(self.sensor_model_table * 255, cmap="gray")
            plt.show()
        elif False:
            plt.plot(self.sensor_model_table[:, 140])
            plt.plot([139, 139], [0.0, 0.08], label="test")
            plt.ylim(0.0, 0.08)
            plt.xlabel("Measured Distance (in px)")
            plt.ylabel("P(Measured Distance | Ground Truth Distance = 140px)")
            plt.show()
Esempio n. 51
0
def prob(val, mu, sig, lam):
    p = lam
    for i in range(len(val)):
        p *= norm.pdf(val[i], mu[i], sig[i][i])
    return p
Esempio n. 52
0
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


nT = 30  # Temporal filter length
nX = 40  # Spatial filter length

N = 30  # Number of neurons

temp_filt = np.zeros((N, nT))
spat_filt = np.zeros((N, nX))

# Define filters manually
for i in range(N - 15):
    temp_filt[i, :] = norm.pdf(np.linspace(-15, 15, nT),
                               loc=i - 12.5 + i / 1.5,
                               scale=.8)
    spat_filt[i, :] = norm.pdf(
        np.linspace(-10, 10, nX), i - 7.5,
        scale=.8) - .8 * norm.pdf(np.linspace(-10, 10, nX), i - 7.5, 1)

for i in range(N - 15, N):
    temp_filt[i, :] = norm.pdf(
        np.linspace(-5, 5, nT), loc=.45 * i - 10, scale=.8) - .6 * norm.pdf(
            np.linspace(-5, 5, nT), loc=.45 * i - 9, scale=.8)
    spat_filt[i, :] = norm.pdf(
        np.linspace(-15, 15, nX), loc=1.5 * i - 33, scale=.7) - .5 * norm.pdf(
            np.linspace(-15, 15, nX), loc=1.5 * i - 33, scale=1.5)

bias = -3 - 2 * np.random.uniform(size=N)  # Bias
print(bias)
Esempio n. 53
0
            for iteration in range(0, 250):

                # We need to have observed at least 3 items for the model to be able to predict
                surr_predictions = np.zeros_like(test_index)
                if iteration > 2 and alpha < 1:
                    surr_estimator.train(
                        np.array(observed_X).astype(float),
                        np.array(observed_y))
                    mu, var = surr_estimator.predict(
                        np.array(surr_X.iloc[test_index]).astype(float))
                    mu = mu.reshape(-1)
                    var = var.reshape(-1)
                    sigma = np.sqrt(var)
                    diff = mu - np.max(observed_y)
                    Z = diff / sigma
                    ei = diff * norm.cdf(Z) + sigma * norm.pdf(Z)
                    surr_predictions = ei

                    # surr_predictions = surr_estimator.predict(np.array(surr_X.iloc[test_index]).astype(float))
                # print(iteration, "\t", np.std(surr_predictions), "\t", np.std(meta_predictions))

                m_corr, m_pvalue = kendalltau(meta_predictions, y[test_index])
                s_corr, s_pvalue = kendalltau(surr_predictions, y[test_index])

                if s_corr > m_corr:
                    surpassed = iteration if surpassed is None else surpassed

                # alpha == 0: Only surrogate predictions
                # alpha == 1: Only meta-model predictions
                corrected_iteration = np.maximum(1, iteration - 2)
                # scaled_meta_predictions = MinMaxScaler().fit_transform(meta_predictions.reshape(-1, 1)).reshape(-1)
Esempio n. 54
0
    r_list = np.random.random(N)  # list with N random numbers between 0 and 1
    for i in range(N):
        if r_list[i] >= 0.5:
            part_pos_list[i] += h  # One step to the right
        else:
            part_pos_list[i] -= h  # One step to the left

# find standard deviation mu and variance sigma to the normal distribution best suited to part_pos_list
mu, sigma = norm.fit(part_pos_list)
print("mu =", mu, "sigma =", sigma)

# pre-plotting
xMax = np.max(np.abs(part_pos_list))  # maximum absolute x position value
xRange = (-xMax * 1.1, xMax * 1.1)  # Range for the plot
xAx = np.linspace(*xRange, 1000)  # list og x values for normal distribution
p = norm.pdf(xAx, mu, sigma)  # normal distribution

# plotting
savename = "RandomWalkIn1D"
fig, ax = plt.subplots(1, 1, num=savename)
# new axis for p
ax2 = ax.twinx()
# Set ax's patch invisible
ax.patch.set_visible(False)
# move ax in front
ax.set_zorder(ax2.get_zorder() + 1)
# Histogram, bins is given as the number of possible positions for a particle
# distribution=False because True will mess with scaling
hist, bins = np.histogram(part_pos_list,
                          bins=(2 * Ntime + 1),
                          density=True,
Esempio n. 55
0
def EI(mean, std, max_val, tradeoff):
    z = (mean - max_val - tradeoff) / std
    return (mean - max_val - tradeoff) * ndtr(z) + std * norm.pdf(z)
Esempio n. 56
0
    #   mean - get_error_conf(sample_size, confidence, std), mean + get_error_conf(sample_size, confidence, std)
    interval = norm.interval(alpha=confidence,
                             loc=mean,
                             scale=std / np.sqrt(sample_size))
    return interval


if __name__ == "__main__":
    """ If the module is called as script, plot the probability density function 
        and the cumulative distribution function.
        Modified from: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
    """
    from scipy.stats import norm
    import matplotlib.pyplot as plt
    import numpy as np

    print('Plotting the Probability Density Function' +
          '\n and the Cumulative Density Function')
    x = np.linspace(norm.ppf(0.001), norm.ppf(0.999), 100)
    plt.subplot(1, 2, 1)
    plt.plot(x, norm.pdf(x), 'r-', lw=5, alpha=0.6)
    plt.title("Probability Density Function")
    plt.xlabel("norm.ppf(0.001) <= x <= norm.ppf(0.999)")
    plt.ylabel("norm.pdf(x)")
    plt.subplot(1, 2, 2)
    plt.plot(x, norm.cdf(x), 'b-', lw=5, alpha=0.6)
    plt.title("Cumulative Density Function")
    plt.xlabel("norm.ppf(0.001) <= x <= norm.ppf(0.999)")
    plt.ylabel("norm.cdf(x)")
    plt.show()
Esempio n. 57
0
def detect_knee(data, window_size=1, s=10):
    """
    Detect the so-called knee in the data.

    The implementation is based on paper [1] and code here (https://github.com/jagandecapri/kneedle).

    @param data: The 2d data to find an knee in.
    @param window_size: The data is smoothed using Gaussian kernel average smoother, this parameter is the window used for averaging (higher values mean more smoothing, try 3 to begin with).
    @param s: How many "flat" points to require before we consider it a knee.
    @return: The knee values.
    """

    data_size = len(data)
    data = np.array(data)

    if data_size == 1:
        return None

    # smooth
    smoothed_data = []
    for i in range(data_size):

        if 0 < i - window_size:
            start_index = i - window_size
        else:
            start_index = 0
        if i + window_size > data_size - 1:
            end_index = data_size - 1
        else:
            end_index = i + window_size

        sum_x_weight = 0
        sum_y_weight = 0
        sum_index_weight = 0
        for j in range(start_index, end_index):
            index_weight = norm.pdf(abs(j - i) / window_size, 0, 1)
            sum_index_weight += index_weight
            sum_x_weight += index_weight * data[j][0]
            sum_y_weight += index_weight * data[j][1]

        smoothed_x = sum_x_weight / sum_index_weight
        smoothed_y = sum_y_weight / sum_index_weight

        smoothed_data.append((smoothed_x, smoothed_y))

    smoothed_data = np.array(smoothed_data)

    # normalize
    normalized_data = MinMaxScaler().fit_transform(smoothed_data)

    # difference
    differed_data = [(x, y - x) for x, y in normalized_data]

    # find indices for local maximums
    candidate_indices = []
    for i in range(1, data_size - 1):
        if (differed_data[i - 1][1] < differed_data[i][1]) and (
                differed_data[i][1] > differed_data[i + 1][1]):
            candidate_indices.append(i)

    # threshold
    step = s * (normalized_data[-1][0] - data[0][0]) / (data_size - 1)

    # knees
    knee_indices = []
    for i in range(len(candidate_indices)):
        candidate_index = candidate_indices[i]

        if i + 1 < len(candidate_indices):  # not last second
            end_index = candidate_indices[i + 1]
        else:
            end_index = data_size

        threshold = differed_data[candidate_index][1] - step

        for j in range(candidate_index, end_index):
            if differed_data[j][1] < threshold:
                knee_indices.append(candidate_index)
                break

    if knee_indices != []:
        return knee_indices  #data[knee_indices]
    else:
        return None
Esempio n. 58
0
def gaussian_ei(X, model, y_opt=0.0, xi=0.01, return_grad=False):
    """
    Use the expected improvement to calculate the acquisition values.

    The conditional probability `P(y=f(x) | x)`form a gaussian with a certain
    mean and standard deviation approximated by the model.

    The EI condition is derived by computing ``E[u(f(x))]``
    where ``u(f(x)) = 0``, if ``f(x) > y_opt`` and ``u(f(x)) = y_opt - f(x)``,
    if``f(x) < y_opt``.

    This solves one of the issues of the PI condition by giving a reward
    proportional to the amount of improvement got.

    Note that the value returned by this function should be maximized to
    obtain the ``X`` with maximum improvement.

    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        Values where the acquisition function should be computed.

    * `model` [sklearn estimator that implements predict with ``return_std``]:
        The fit estimator that approximates the function through the
        method ``predict``.
        It should have a ``return_std`` parameter that returns the standard
        deviation.

    * `y_opt` [float, default 0]:
        Previous minimum value which we would like to improve upon.

    * `xi`: [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Useful only when ``method`` is set to "EI"

    * `return_grad`: [boolean, optional]:
        Whether or not to return the grad. Implemented only for the case where
        ``X`` is a single sample.

    Returns
    -------
    * `values`: [array-like, shape=(X.shape[0],)]:
        Acquisition function values computed at X.
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        if return_grad:
            mu, std, mu_grad, std_grad = model.predict(X,
                                                       return_std=True,
                                                       return_mean_grad=True,
                                                       return_std_grad=True)

        else:
            mu, std = model.predict(X, return_std=True)

    values = np.zeros_like(mu)
    mask = std > 0
    improve = y_opt - xi - mu[mask]
    scaled = improve / std[mask]
    cdf = norm.cdf(scaled)
    pdf = norm.pdf(scaled)
    exploit = improve * cdf
    explore = std[mask] * pdf
    values[mask] = exploit + explore

    if return_grad:
        if not np.all(mask):
            return values, np.zeros_like(std_grad)

        # Substitute (y_opt - xi - mu) / sigma = t and apply chain rule.
        # improve_grad is the gradient of t wrt x.
        improve_grad = -mu_grad * std - std_grad * improve
        improve_grad /= std**2
        cdf_grad = improve_grad * pdf
        pdf_grad = -improve * cdf_grad
        exploit_grad = -mu_grad * cdf - pdf_grad
        explore_grad = std_grad * pdf + pdf_grad

        grad = exploit_grad + explore_grad
        return values, grad

    return values
Esempio n. 59
0
 b: upper bound of the interval of test values
 n: number of test values
 name: name of the test to be printed by the test program
 c_fn_name: name of the C function being tested
 args: additional arguments of the C function being tested
"""
TestParams = namedtuple('TestParams', 'a b n f name c_fn_name args'.split())

test_params_list = [
    TestParams(-1, 2, 100, uniform.pdf, 'uniform(0, 1) pdf', 'uniform_pdf',
               (0, 1)),
    TestParams(-1, 2, 100, uniform.cdf, 'uniform(0, 1) cdf', 'uniform_cdf',
               (0, 1)),
    TestParams(-10, 10, 1000, norm.pdf, 'gaussian(0, 1) pdf', 'gaussian_pdf',
               (0, 1)),
    TestParams(-10, 20, 1000, lambda x: norm.pdf(x, 10, 2**.5),
               'gaussian(10, 2) pdf', 'gaussian_pdf', (10, 2)),
    TestParams(-10, 10, 1000, norm.cdf, 'gaussian(0, 1) cdf', 'gaussian_cdf',
               (0, 1)),
    TestParams(-10, 20, 1000, lambda x: norm.cdf(x, 10, 2**.5),
               'gaussian(10, 2) cdf', 'gaussian_cdf', (10, 2)),
    TestParams(-1, 10, 1000, lambda x: gamma.pdf(x, 1), 'gamma(1, 1) pdf',
               'gamma_pdf', (1, 1)),
    TestParams(-1, 10, 1000, lambda x: gamma.pdf(x, 1.25, 0, 1),
               'gamma(1.25, 1) pdf', 'gamma_pdf', (1.25, 1)),
    TestParams(-1, 10, 1000, lambda x: gamma.pdf(x, 1.25, 0, 1 / 2),
               'gamma(1.25, 2) pdf', 'gamma_pdf', (1.25, 2)),
    TestParams(-1, 10, 1000, lambda x: gamma.cdf(x, 1), 'gamma(1, 1) cdf',
               'gamma_cdf', (1, 1)),
    TestParams(-1, 10, 1000, lambda x: gamma.cdf(x, 1.25, 0, 1),
               'gamma(1.25, 1) cdf', 'gamma_cdf', (1.25, 1)),
Esempio n. 60
0
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

x = np.linspace(-4, 4, num=100)

fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot()

ax.plot(x, norm.pdf(x, loc=-1, scale=1), color="magenta")
ax.plot(x, norm.pdf(x, loc=0, scale=1), color=(0.85, 0.64, 0.12))
ax.plot(x, norm.pdf(x, loc=1, scale=1), color="#228B22")

plt.savefig('colours.svg', bbox_inches='tight')
plt.show()