def find_tail_point(w, min_height, ratio): """Find best fit area, that minimize std of gaussian approximation from waveform. find when waveform fall below area * ratio, and at least 2 sigma away from mean time. """ if np.max(w) < min_height: return [] # Quick fit to the waveform and locate tail position ix = np.arange(len(w)) # Use center population to estimate mean and sigma cnt = w > 0.02*np.max(w) mean = np.sum(ix[cnt]*w[cnt])/np.sum(w[cnt]) sigma = (np.sum((ix[cnt]-mean)**2*w[cnt])/np.sum(w[cnt]))**0.5 # Use estimated mean and sigma to find amplitude amp = np.sum(w[cnt]*norm.pdf(ix[cnt], mean, sigma))/np.sum(norm.pdf(ix[cnt], mean, sigma)**2) # Define tail by waveform height drop below certain ratio of amplitude tail = np.where(w < ratio*amp)[0] tail = tail[(tail > mean+2*sigma) & (tail != len(w))] if len(tail) > 0: return tail[:1] else: return []
def theta(cflag,s,k,v,r,t): d1=(np.log(s/k)+(r+v*v/2)*t)/(v*np.sqrt(t)) d2=d1-v*np.sqrt(t) if cflag: # call return -s*norm.pdf(d1)*v/(2*np.sqrt(t)) - r*k*np.exp(-r*t)*norm.pdf(d2) else: # put return -s*norm.pdf(d1)*v/(2*np.sqrt(t)) + r*k*np.exp(-r*t)*norm.pdf(-d2)
def dual_gaussian(x, amp1=1.0, mean1=0.0, std1=1.0, amp2=1.0, mean2=0.0, std2=1.0): """Sum of two Gaussians. Parameters ---------- x : array Function argument amp1: float Amplitude parameter of the first Gaussian mean1: float Mean parameter of the first Gaussian std1: float Standard deviation parameter of the first Gaussian amp2: float Amplitude parameter of the second Gaussian mean2: float Mean parameter of the second Gaussian std2: float Standard deviation parameter of the second Gaussian """ from scipy.stats import norm if std1 <= 0 or std2 <= 0: return np.nan return (amp1 * norm.pdf(x, mean1, std1)) + (amp2 * norm.pdf(x, mean2, std2))
def test_mlexplorer(self): x = np.arange(0,600,1.0) nb_samples = 100 # number of samples in our dataset # true partial spectra S_1 = norm.pdf(x,loc=200.,scale=130.) S_2 = norm.pdf(x,loc=400,scale=70) S_true = np.vstack((S_1,S_2)) #60 samples with random concentrations between 0 and 1 C_ = np.random.rand(nb_samples) C_true = np.vstack((C_,(1-C_))).T # We make some observations with random noise Obs = np.dot(C_true,S_true) + np.random.randn(nb_samples,len(x))*1e-4 # new observations C_new_ = np.random.rand(10) #10 samples with random concentrations between 0 and 1 C_new_true = np.vstack((C_new_,(1-C_new_))).T noise_new = np.random.randn(len(x))*1e-4 Obs_new = np.dot(C_new_true,S_true) + noise_new explo = rp.mlexplorer(Obs) # we just test that it runs for now explo.algorithm = 'NMF' explo.nb_compo = 2 explo.test_size = 0.3 explo.scaler = "MinMax" explo.fit() explo.refit()
def testGaussian2D(self): gauss2d = Gauss2D(0.5/np.pi, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0) self.assertEqual(gauss2d.integral(), 1.0) self.assertEqual(gauss2d.evaluate(0.0, 0.0), 0.5/np.pi) gauss2d.update_params(mu0=-78.9) self.assertEqual(gauss2d.evaluate(-78.9, 0.0), 0.5/np.pi) self.assertGreater(gauss2d.evaluate(-78.9, 0.0), gauss2d.evaluate(-78.9, 1.0)) gauss2d.update_params(offset=1234.5) self.assertEqual(gauss2d.integral(), 1.0) # Test the actual fitting. mu0 = 40.0 mu1 = 60.0 sigma0 = 10.0 sigma1 = 7.0 x = np.arange(100) X0, X1 = np.meshgrid(x, x, indexing="ij") Z = norm.pdf((X0 - mu0) / sigma0) * norm.pdf((X1 - mu1) / sigma1) gauss2d.initial_guess(Z) gauss2d.fit(Z) self.assertAlmostEqual(gauss2d.param_dict["mu0"], mu0) self.assertAlmostEqual(gauss2d.param_dict["mu1"], mu1) self.assertAlmostEqual(gauss2d.param_dict["sigma0"], sigma0) self.assertAlmostEqual(gauss2d.param_dict["sigma1"], sigma1)
def analyze_pedestrian_queue(file_path): """Analyze the pedestrians created by the simulation.""" destinations, speeds = np.loadtxt(file_path, delimiter=',', unpack=True) # Show Histograms of destination selections plt.hist(destinations, bins=len(np.unique(destinations)), normed=True) plt.xlabel("Selected Destinations (IDs)") plt.ylabel("Normalized Frequency (PDF)") plt.grid(True) # Intended pedestrian speed distribution mu = 1.340 sigma = 0.265 pdf_range = np.arange(0, 2.5, 0.001) # Plot theoretical distribution plt.figure() plt.plot(pdf_range, norm.pdf(pdf_range, mu, sigma)) plt.grid(True) plt.xlabel("Pedestrian Speeds (m/s)") plt.ylabel("PDF") # Plot the histogram of speeds with the theoretical distribution overlaid plt.figure() plt.hist(speeds, bins=int((2*len(speeds))**(1./3)), normed=True) plt.grid(True) plt.xlabel("Pedestrian Speeds (m/s)") plt.ylabel("Normalized Frequency (PDF)") plt.plot(pdf_range, norm.pdf(pdf_range, mu, sigma), linewidth=2) plt.legend(["Theory", "Actual"]) plt.show() # Show the plots
def VOIfunc(self, n, pointNew, grad): xNew = pointNew nTraining = self._GP._numberTraining tempN = nTraining + n # n=n-1 vec = np.zeros(tempN) X = self._PointsHist for i in xrange(tempN): vec[i] = self._GP.muN(X[i, :], n) maxObs = np.max(vec) std = np.sqrt(self._GP.varN(xNew, n)) muNew, gradMu = self._GP.muN(xNew, n, grad=True) Z = (muNew - maxObs) / std temp1 = (muNew - maxObs) * norm.cdf(Z) + std * norm.pdf(Z) if grad == False: return temp1 var, gradVar = self._GP.varN(xNew, n, grad=True) gradstd = 0.5 * gradVar / std gradZ = ((std * gradMu) - (muNew - maxObs) * gradstd) / var temp10 = ( gradMu * norm.cdf(Z) + (muNew - maxObs) * norm.pdf(Z) * gradZ + norm.pdf(Z) * gradstd + std * (norm.pdf(Z) * Z * (-1.0)) * gradZ ) return temp1, temp10
def plot_mcmc_samples(data, samples, true_clusters, x_min= -15, x_max=15, stepsize=0.001): # Plot the data plt.title('Observed Data Points\n{0} total'.format(len(data))) plt.hist(data, 20) plt.savefig('points.pdf') plt.clf() # Plot the mean and bands of size 2 stdevs for the samples. # Also plot the last sample from the MCMC chain. xvals = np.arange(x_min, x_max, stepsize) true_pdf = sum(norm.pdf(xvals, mean, stdev) for mean, stdev in true_clusters) / float(len(true_clusters)) no_bands = np.zeros(len(xvals)) sample_pdfs = np.array([sum([norm.pdf(xvals, mean, stdev) for mean, stdev in sample]) / float(len(sample)) for sample in samples]) sample_means = sample_pdfs.mean(axis=0) sample_bands = sample_pdfs.std(axis=0)*2. last = sample_pdfs[-1] means = np.array([true_pdf, sample_means, last]) bands = np.array([no_bands, sample_bands, no_bands]) names = ['True PDF', 'Bayes\nEstimate\n(+/- 2 stdevs)', 'Last MCMC\nsample'] mcmc_params = '{0} points, {1} iterations, {2} burn-in, {3} thin, {4} samples'.format(NUM_POINTS, ITERATIONS, BURN_IN, THIN, len(samples)) dp_params = 'alpha={0}, mu0={1}, nu0={2}, a0={3}, b0={4}'.format(ALPHA, MU_0, NU_0, A_0, B_0) plot_noisy_means('Dirichlet Process Mixture Results', means, bands, names, xvals=xvals, xlabel='X', ylabel='Probability', subtitle='{0}\n{1}'.format(mcmc_params, dp_params)) # Plot the distribution of clusters in the samples plot_cluster_distribution(samples)
def get_sent_similarity(user_data): scores=[] #=====[ Creates counts for each sentiment score in 21 buckets of width 0.1 from -1 to 1 ]===== for data in user_data: user_score = [0]*21 for tweet in data: score = int(float("%.1f" % tweet['score'])*10+10) user_score[score] += 1 scores.append(user_score) #=====[ Forms normalized probability distributions for each users sentiments ]===== x = np.linspace(-1, 1, 100) mu, std = norm.fit(scores[0]) p = norm.pdf(x, mu, std) mu, std = norm.fit(scores[1]) p2 = norm.pdf(x,mu,std) #=====[ Takes Kullback-Leibler Divergence between probability distributions ]===== similarity = float("%.5f" % scipy.stats.entropy(p,p2)) #=====[ Converts similarity score to a percentage from 10 - 90 to display on compatability spectrum ]===== if similarity < 0.003: return 90 elif similarity > 0.07: return 10 else: return int(10 + ((similarity*100)-1)/6.7*80) return int(similarity)
def dist2weights_gauss(dist, max_r, max_w=1, min_w=1e-3, S=None, rescale=True): """Gaussian distance weighting. Parameters ---------- dist: float or np.ndarray the distances to be transformed into weights. max_r: float maximum radius of the neighbourhood considered. max_w: int (default=1) maximum weight to be considered. min_w: float (default=1e-8) minimum weight to be considered. S: float or None (default=None) the scale magnitude. rescale: boolean (default=True) if re-scale the magnitude. Returns ------- weights: np.ndarray, array_like, shape (num. of retrievable candidates) values of the weight of the neighs inferred from the distances. """ if S is None: S = set_scale_gauss(max_r, max_w, min_w) if rescale: A = max_w/(norm.pdf(0, scale=S)-norm.pdf(max_r, scale=S)) weights = A*norm.pdf(dist, scale=S) else: A = max_w/norm.pdf(0, scale=S) weights = A*norm.pdf(dist, scale=S) return weights
def show_cond(): results = load_same_loc() tt = 8 # Graph the histogram of activity at the center of # of the room from cell 0 acts = [x[0] for x in results] plt.subplot(2,1,1) count, bins, _ = plt.hist(acts,bins=40,normed=Normed) plt.title('Original dist. Pts: %i'%(len(acts),)) xlm = plt.xlim() xs = np.linspace(xlm[0],xlm[1],1000) mn, std = gaussian(acts) plt.plot(xs,norm.pdf(xs,mn,std)) # Graph the histogram of activity at the center of # of the room from cell 1 plt.subplot(2,1,2) nb = [x[1] for x in results] plt.hist(nb,bins=bins,normed=Normed) mn, std = gaussian(nb) plt.plot(xs,norm.pdf(xs,mn,std)) plt.title('Side dist. Pts: %i'%(len(nb),)) plt.xlim(xlm) mx_i = np.argmax(count) plt.figure() plt.subplot(2,1,1) nb = [x[1] for x in results] plt.hist(nb,bins=bins,normed=Normed) plt.title('Side dist. Pts: %i'%(len(nb),)) mn, std = gaussian(nb) plt.plot(xs,norm.pdf(xs,mn,std)) plt.xlim(xlm) plt.subplot(2,1,2) iss = get_iss(mx_i+1, bins, acts) acts_neighbor = [x[1] for x in results[iss]] plt.hist(acts_neighbor,bins=bins,normed=Normed) plt.title('Pts: %i'%(len(acts_neighbor))) nb = acts_neighbor mn, std = gaussian(nb) plt.plot(xs,norm.pdf(xs,mn,std)) plt.xlim(xlm) plt.figure() for delt in range(-tt,tt): iss = get_iss(mx_i+delt, bins, acts) acts_neighbor = [x[1] for x in results[iss]] plt.subplot(2,tt,delt+tt+0) plt.hist(acts_neighbor,bins=bins,normed=Normed) nb = acts_neighbor mn, std = gaussian(nb) plt.plot(xs,norm.pdf(xs,mn,std)) plt.title('Pts: %i'%(len(acts_neighbor))) plt.xlim(xlm) plt.show()
def mcmc(N=1000, k={"t1":100, "t2":100, "t3":5}, x=[], v=[]): chute = {"t1":[10],"t2":[10],"t3":[0.01]} M = chute hiper = {"t1":[0,100],"t2":[0,100],"t3":[0.1,0.1]} #VALORES DOS HIPERPARAMETROS for i in range(N-1): for j in M.keys(): if j == "t1" or j == "t2": M[j].append( np.random.normal(loc = M[j][-1]-k[j]/100, scale = k[j], size = 1) ) lista = [ [ M[l][-1] for l in M.keys()] , [ M[l][-1] if l!=j else M[l][-2] for l in M.keys() ] ] t1 = norm.pdf(M[j][-1], loc = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[0]) * norm.pdf(M[j][-2], loc = M[j][-1]-k[j]/100, scale = k[j]) t2 = norm.pdf(M[j][-2], loc = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[1]) * norm.pdf(M[j][-1], loc = M[j][-2]-k[j]/100, scale = k[j]) teste = (t1/t2) else: M[j].append( np.random.gamma(shape = M[j][-1]*k[j], scale = k[j], size = 1) ) lista = [ [ M[l][-1] for l in M.keys()] , [ M[l][-1] if l!=j else M[l][-2] for l in M.keys() ] ] t1 = gamma.pdf(M[j][-1], a = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[0]) * gamma.pdf(M[j][-2], a = M[j][-1]*k[j], scale = k[j]) t2 = gamma.pdf(M[j][-2], a = hiper[j][0], scale = hiper[j][1]) * L(x, v, lista[1]) * gamma.pdf(M[j][-1], a = M[j][-2]*k[j], scale = k[j]) teste = (t1/t2) if (min(1 , teste) < np.random.uniform(low = 0, high = 1, size = 1) ) or (np.isinf(teste)) or (np.isnan(teste)) : M[j][-1] = M[j][-2] return(M)
def generate_random_traffic(size=200, date_start="2016/01/01", output_path= "simu.csv"): x = np.linspace(1, size, size) index = pd.date_range(date_start, periods=size, freq="1H") rs = np.random.RandomState() g = pd.Series(0, index=index) # gaussian mixture for i in rs.choice(x, size=size / 3, replace=False): spread = rs.uniform(1, 100) multiplicator = 5 * spread g += pd.Series(norm.pdf(x, i, spread) * multiplicator, index=index) # gaussian 24h seasonality seasonx = x = np.linspace(1, 24, 24) season_index = pd.date_range('01/01/2016', periods=24, freq="1H") season = pd.Series(0, index=season_index) for i in rs.choice(x[6:-6], size=4, replace=True): spread = rs.uniform(1, 20) multiplicator = g.mean() s = pd.Series(norm.pdf(seasonx, i, spread) * multiplicator, index=season_index) season += s for i in range(0, len(g) / len(season)): g = g.add(season.shift(24 * i, freq="1H") * 3, fill_value=0) g = g.add(pd.Series(0, index=index).cumsum()) g.to_csv(output_path)
def simulate_stupidDPM(iter_num, M): # Generate mixture sample N = 1000 mu = [0.0, 10.0, 3.0] components = np.random.choice(range(3), size = N, replace = True, p = [0.3, 0.5, 0.2]) samples = [norm.rvs(size = 1, loc = mu[components[i]], scale = 1)[0] for i in range(N)] ## Sample G from DP(M, G0) v = beta.rvs(a = 1.0, b = M, size = N) prob_vector = np.append(np.array(v[0]), v[1:] * np.cumprod(1.0 - v[:-1])) thetas = norm.rvs(size = N, loc = 1.0, scale = 1.0) ### Initialize thetas thetas = np.random.choice(thetas, size = N, replace = True, p = prob_vector) ### Start MCMC chain for i in xrange(iter_num): for j in xrange(N): theta_temp = np.append(thetas[:j], thetas[j+1:]) p = np.append(norm.pdf(samples[j], loc = theta_temp, scale = 1.0), M * norm.pdf(samples[j], loc = 1.0, scale = np.sqrt(2.0))) p = p / sum(p) temp = np.random.choice(np.append(theta_temp, N), size = 1, replace = True, p = p) if (temp == N): thetas[j] = norm.rvs(size = 1, loc = 0.5 * (samples[j] + 1), scale = np.sqrt(0.5)) else: thetas[j] = temp print(thetas) return {"thetas": thetas, "y": samples}
def __init__(self, image=None, mesh_factor=14, density_distribution=None): super(CoordinateSolver, self).__init__() self.image = image self.height, self.width, _ = self.image.shape self.mesh_factor = mesh_factor self.height /= self.mesh_factor self.width /= self.mesh_factor self.image = self.image[:self.mesh_factor*self.height, :self.mesh_factor*self.width] if type(density_distribution) == np.ndarray: restricted_density = density_distribution[:self.mesh_factor*self.height, :self.mesh_factor*self.width] target_areas = restricted_density target_areas = target_areas[:-1, :-1] else: target_areas = np.indices((self.width-1, self.height-1)).T.astype('float32') target_areas = norm.pdf(target_areas[:, :, 0], self.width/2, self.width/5)\ *norm.pdf(target_areas[:, :, 1], self.height/2, self.height/5) target_areas /= sum(sum(target_areas)) normalisation_factor = (self.height-1)*(self.width-1) target_areas_normalised = target_areas*normalisation_factor self.padded_targets = np.zeros([self.height+1, self.width+1]) self.padded_targets[1:-1, 1:-1] = target_areas_normalised self.coordinates = np.indices((self.width, self.height)).T.astype('float32') self.total_error = (self.height-1)*(self.width-1) self.min_coords = self.coordinates.copy() self.areas = calculate_areas(self.coordinates) self.errors = np.zeros(self.padded_targets.shape) self.x_weights = np.ones([self.height*self.width, self.height + 1, self.width + 1]) self.y_weights = np.ones([self.height*self.width, self.height + 1, self.width + 1]) self.make_weights()
def get_beam_loc(ra,dec,utc,beam_a,beam_b,sn_a,sn_b): sn_a = float(sn_a) sn_b = float(sn_b) boresight = ephem.Equatorial(ra,dec) ra_a,dec_a,info = coords.radec_of_offset_fanbeam(boresight.ra,boresight.dec,beam_a,utc) ns_a,ew_a = info['fanbeam_nsew'] lst = info['mol_curr'].sidereal_time() ra_b,dec_b,info = coords.radec_of_offset_fanbeam(boresight.ra,boresight.dec,beam_b,utc) ns_b,ew_b =info['fanbeam_nsew'] ns = np.linspace(np.radians(-3.0),np.radians(3.0),size) ew = np.linspace(np.radians(-100/3600.0),np.radians(100/3600.0),size) ns,ew = np.meshgrid(ns,ew) beam_a_map = fanbeam_model(ns_a,ew_a,ns+ns_a,ew+ew_a) beam_b_map = fanbeam_model(ns_b,ew_b,ns+ns_a,ew+ew_a) ratio = sn_a/sn_b ratio_sigma = ratio * np.sqrt(1/sn_a**2 + 1/sn_b**2) ratio_map = beam_a_map/beam_b_map ns_fwhm = np.radians(2.0) prob_map = norm.pdf(ratio_map,ratio,ratio_sigma) * norm.pdf(ns,0.0,ns_fwhm/2.355) prob_map/=prob_map.max() ns = ns+ns_a ew = ew+ew_a best_ns = ns[(prob_map.max(axis=1).argmax(),prob_map.max(axis=0).argmax())] best_ew = ew[(prob_map.max(axis=1).argmax(),prob_map.max(axis=0).argmax())] ha,dec = coords.nsew_to_hadec(best_ns,best_ew) ra,dec = coords.radec_to_J2000(lst-ha,dec,utc) return ra,dec,ns,ew,prob_map,lst
def camera_waveforms(): camera = CameraGeometry.from_name("CHEC") n_pixels = camera.n_pixels n_samples = 96 mid = n_samples // 2 pulse_sigma = 6 r_hi = np.random.RandomState(1) r_lo = np.random.RandomState(2) x = np.arange(n_samples) # Randomize times t_pulse_hi = r_hi.uniform(mid - 10, mid + 10, n_pixels)[:, np.newaxis] t_pulse_lo = r_lo.uniform(mid + 10, mid + 20, n_pixels)[:, np.newaxis] # Create pulses y_hi = norm.pdf(x, t_pulse_hi, pulse_sigma) y_lo = norm.pdf(x, t_pulse_lo, pulse_sigma) # Randomize amplitudes y_hi *= r_hi.uniform(100, 1000, n_pixels)[:, np.newaxis] y_lo *= r_lo.uniform(100, 1000, n_pixels)[:, np.newaxis] y = np.stack([y_hi, y_lo]) return y, camera
def model_mixture(separation, prevalence_of_1st): x_arr = np.linspace(-7, 7, 100) plt.title('%s std delta - %s%% - %s%%'% (separation, prevalence_of_1st*100, (1-prevalence_of_1st)*100)) plt.plot(x_arr, prevalence_of_1st*norm.pdf(x_arr, loc=-separation/2.), 'r') plt.plot(x_arr, (1-prevalence_of_1st)*norm.pdf(x_arr, loc=separation/2.), 'b') plt.plot(x_arr, prevalence_of_1st*norm.pdf(x_arr, loc=-separation/2.) + (1-prevalence_of_1st)*norm.pdf(x_arr, loc=separation/2.), 'k')
def predictive_recursion_fdr(z, sweeporder, grid_x, theta_guess, mu0 = 0., sig0 = 1.0, nullprob = 1.0, decay = -0.67): gridsize = grid_x.shape[0] theta_subdens = deepcopy(theta_guess) pi0 = nullprob joint1 = np.zeros(gridsize) ftheta1 = np.zeros(gridsize) # Begin sweep through the data for i, k in enumerate(sweeporder): cc = (3. + i)**decay joint1 = norm.pdf(grid_x, loc=z[k] - mu0, scale=sig0) * theta_subdens m0 = pi0 * norm.pdf(z[k] - mu0, 0., sig0) m1 = trapezoid(grid_x, joint1) mmix = m0 + m1 pi0 = (1. - cc) * pi0 + cc * (m0 / mmix) ftheta1 = joint1 / mmix theta_subdens = (1. - cc) * theta_subdens + cc * ftheta1 # Now calculate marginal distribution along the grid points y_mix = np.zeros(gridsize) y_signal = np.zeros(gridsize) for i, x in enumerate(grid_x): joint1 = norm.pdf(grid_x, x - mu0, sig0) * theta_subdens m0 = pi0 * norm.pdf(x, mu0, sig0) m1 = trapezoid(grid_x, joint1) y_mix[i] = m0 + m1; y_signal[i] = m1 / (1. - pi0) return {'grid_x': grid_x, 'sweeporder': sweeporder, 'theta_subdens': theta_subdens, 'pi0': pi0, 'y_mix': y_mix, 'y_signal': y_signal}
def forward_scaled(self, data): ''' Alpha is the joint probability of observing all data and z alpha(z[n]) = p(x1...xn, z[n]) Use scale parameter to prevent underflow ''' data = data_raw[1] length = data.shape[0] # Convert data to standard deviations and find probabilities data_norm = (data[:,None] - emission_means) / np.sqrt(emission_variances) emission_pdf = norm.pdf(data_norm).T emission_pdf /= emission_pdf.sum(0) alpha = np.zeros([n_states, length], np.float) scale = np.zeros(length, np.float) # Initialize alpha # datum_norm = (data[0] - emission_means) / np.sqrt(emission_variances) alpha[:,0] = prior * norm.pdf(data_norm[0]) scale[0] = 1./alpha[:,0].sum() alpha[:,0] *= scale[0] # Recursively update alpha # for i,d in enumerate(data[1:]): for t in xrange(1,length): for k in xrange(n_states): # # alpha[k] = np.sum( p(X|Z) * p(Z'|Z) * alpha[k]) alpha[k,t] = np.sum( alpha[:,t-1] * transition_matrix[:,k]) * emission_pdf[k,t] scale[t] = 1./alpha[:,t].sum() alpha[:,t] *= scale[t]
def starloglik(para): res = para[0] * norm.pdf(dtemp[:, 17] + para[1], scale=1) + (1 - para[0]) * norm.pdf( dtemp[:, 17], loc=para[2], scale=para[3] ) res[res == 0] = 10 ** (-200) res = -np.sum(np.log(res)) return res
def BlackScholes(): data = {} S = float(request.args.get('price')) K = float(request.args.get('strike')) T = float(request.args.get('time')) R = float(request.args.get('rate')) V = float(request.args.get('vol')) d1 = (log(float(S)/K)+(R+V*V/2.)*T)/(V*sqrt(T)) d2 = d1-V*sqrt(T) data['cPrice'] = S*norm.cdf(d1)-K*exp(-R*T)*norm.cdf(d2) data['pPrice'] = K*exp(-R*T)-S+data['cPrice'] data['cDelta'] = norm.cdf(d1) data['cGamma'] = norm.pdf(d1)/(S*V*sqrt(T)) data['cTheta'] = (-(S*V*norm.pdf(d1))/(2*sqrt(T))-R*K*exp(-R*T)*norm.cdf(d2))/365 data['cVega'] = S*sqrt(T)*norm.pdf(d1)/100 data['cRho'] = K*T*exp(-R*T)*norm.cdf(d2)/100 data['pDelta'] = data['cDelta']-1 data['pGamma'] = data['cGamma'] data['pTheta'] = (-(S*V*norm.pdf(d1))/(2*sqrt(T))+R*K*exp(-R*T)*norm.cdf(-d2))/365 data['pVega'] = data['cVega'] data['pRho'] = -K*T*exp(-R*T)*norm.cdf(-d2)/100 return json.dumps(data)
def obj_func1(params, price_history, vol, price_durations, y_bar): """ Objective function for optimization """ num_trades = len(price_history) - 1 P_last = price_history[:-1] pdelta = np.array(price_history[1:]) - P_last K = len(price_durations) + 1 informed_prices = params[-K:] P_informed = abmtools.create_price_vector(informed_prices, price_durations, num_trades) x = params[:-K] Sigma_0 = x[4] lfunc1 = lambda x: likelihood1(pdelta, vol, x[0], x[1], x[2], x[3], P_informed, P_last, Sigma_0, y_bar) \ + np.log(norm.pdf(x[0], 0.5, 0.1)) \ + np.log(norm.pdf(x[1], 5e-2, 1e-1)) \ + np.log(norm.pdf(x[2], 100., 15.)) \ + np.log(norm.pdf(x[3], 0.02**2, 0.01)) \ + np.log(norm.pdf(x[4], 10, 5)) return -lfunc1(x)
def log_posterior_1(params0, price_history, vol, price_durations, y_bar): """ Threshold function for metropolis hastings. It is the the log of posterior distribution """ num_trades = len(price_history) - 1 P_last = price_history[:-1] pdelta = np.array(price_history[1:]) - P_last K = len(price_durations) + 1 informed_prices = params0[-K:] P_informed = abmtools.create_price_vector(informed_prices, price_durations, num_trades) x = params0[:-K] Sigma_0 = x[4] lfunc1 = lambda x: likelihood1(pdelta, vol, x[0], x[1], x[2], x[3], P_informed, P_last, Sigma_0, y_bar) \ + np.log(norm.pdf(x[0], 0.3, 0.1)) \ + np.log(norm.pdf(x[1], 5e-2, 1e-1)) \ + np.log(norm.pdf(x[2], 80., 15.)) \ + np.log(norm.pdf(x[3], 0.02**2, 0.01)) \ + np.log(norm.pdf(x[4], 10, 5)) return lfunc1(x)
def lnprob(self, theta): lp = 0 # Covariance amplitude lp += self.ln_prior.lnprob(theta[0]) # Lengthscales lp += self.tophat.lnprob(theta[1:self.n_ls + 1]) # Prior for the Bayesian regression kernel pos = (self.n_ls + 1) end = (self.n_ls + self.n_lr + 1) lp += -np.sum((theta[pos:end]) ** 2 / 10.) # Env Noise #lp += self.ln_prior_env_noise.lnprob(theta[end]) #lp += self.ln_prior_env_noise.lnprob(theta[end + 1]) # alpha lp += norm.pdf(theta[end], loc=-7, scale=1) # beta lp += norm.pdf(theta[end + 1], loc=0.5, scale=1) # Noise lp += self.horseshoe.lnprob(theta[-1]) return lp
def windowScores3(seq, length): diffs = seq[1:] - seq[:-1] absDiffs = np.abs(diffs) diffs_2 = diffs[1:] - diffs[:-1] absDiffs_2 = np.abs(diffs_2) # variance_diff = np.var(diffs) # expectedDiff = np.mean(absDiffs) expectedDiff = np.std(diffs) expectedDiffs_2 = absDiffs[:-1] + expectedDiff scores = np.zeros(seq.shape) actualDiffs = diffs[:-1] actualDiffs_2 = absDiffs_2 # want (actual diff / expected diff) * (expected diff_2 / actual diff_2) firstProbs = norm.pdf(actualDiffs / expectedDiff) secondProbs = norm.pdf((actualDiffs_2 - expectedDiffs_2) / expectedDiff) # scores[1:-1] = (actualDiffs / expectedDiff) * (expectedDiffs_2 / actualDiffs_2) scores[1:-1] = firstProbs * secondProbs # scores[1:-1] = firstProbs # scores[1:-1] = secondProbs print describe(scores, 'scores') return scores
def theta(self, S, t, vol, r): if t > self.T: return np.zeros(len(S)) if self.op_type == 'c': return np.subtract( np.true_divide( np.multiply(-vol, np.multiply(S, norm.pdf(self.d1(S, t, vol, r)))), np.multiply(2, np.power( np.subtract(self.T, t), .5))), np.multiply(r, np.multiply(self.K, np.multiply( np.exp( np.multiply(-r, np.subtract(self.T, t))), norm.cdf(self.d2(S, t, vol, r)))))) else: return np.add( np.true_divide( np.multiply(-vol, np.multiply(S, norm.pdf(self.d1(S, t, vol, r)))), np.multiply(2, np.power( np.subtract(self.T, t), .5))), np.multiply(r, np.multiply(self.K, np.multiply( np.exp( np.multiply(-r, np.subtract(self.T, t))), norm.cdf( np.multiply(-1, self.d2(S, t, vol, r)))))))
def plotEstimateVsN(): """ Estimate the mean of a Gaussian distribution, plotting the resulting distributions as the number of data points increase. The true variance is known in advance. """ # Generate points from a Gaussian distribution mu = -0.8 # Mean var = 0.1 # Variance # Use mean of 0 for prior, and the true variance for both prior and likelihoods data = np.random.normal(mu, var, 10) # Plot the prior distribution with mean 0 and true variance x = np.linspace(-1, 1, 100) plt.plot(x, norm.pdf(x, 0, np.sqrt(var)), label="N = 0") # Plot distribution as N gets larger for i in [1, 2, 10]: # Estimate the mean and variance from i data points mu, v = estimateBayes(data[:i], 0, var, var) # Plot the normal distribution curve plt.plot(x, norm.pdf(x, mu, np.sqrt(v)), label="N = {0}".format(i)) plt.legend() plt.show()
def P_number_true_obs_fast(args): E_true_links = 0 p_at_least_one_bp_at_given_position = 1- P_breakpoints_in_interval(1, args.bp_ratio, 0) k = 2 * args.readlen / float(args.cov) for i in range((args.insertion_size + args.readlen - args.soft)+1, args.mean + 4*args.stddev): # When internal breakpoint occur within mean + 4*sigma E_true_links += 2*(1/k) * (v(i,args.insertion_size, args.readlen, args.soft) - 1 ) * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position**2 # when no breakpoint occurs on one side E_true_links += 2*(1/k) * 1 * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position # when no breakpoint occurs on both sides E_true_links += (1/k) * 1 * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i) #print v(i,args.insertion_size, args.readlen, args.soft) # when no breakpoint occurs on one side i = args.mean + 4*args.stddev E_true_links += 2*(1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position # when no breakpoint occurs on both sides i = args.mean + 4*args.stddev print v(i,args.insertion_size, args.readlen, args.soft) print 1/k E_true_links += (1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i) return E_true_links
def naive_bayes(data,classifier,sample): from scipy.stats import norm idx_male = array([i for i in range(len(classifier)) if classifier[i]==0]) idx_female = array([i for i in range(len(classifier)) if classifier[i]==1]) mean_male = mean(data[idx_male,:],0) std_male = std(data[idx_male,:],0) mean_female = mean(data[idx_female,:],0) std_female = std(data[idx_female,:],0) probs_female = [] for i in range(len(mean_female)): probs_female.append( norm.pdf(sample[i],mean_female[i],std_female[i])) probs_male = [] for i in range(len(mean_male)): probs_male.append( norm.pdf(sample[i],mean_male[i],std_male[i])) p_male = cumprod(probs_male)[-1] * 0.5 p_female = cumprod(probs_female)[-1] * 0.5 if p_male > p_female: print 'The person is MALE', 'says: Naive Bayes' return 0 else: print 'The person is FEMALE','says: : Naive Bayes' return 1
def tsmaker(m, s, j): t = np.arange(0.0, 1.0, 0.01) v = norm.pdf(t, m, s) + j * np.random.randn(100) return ts.TimeSeries(v, t)
def gfit(inp, sec_name): #Gaussian fit at a given energy sum_edep = inp[sec_name] nbins = 60 plt.style.use("dark_background") col = "lime" #col = "black" fig = plt.figure() ax = fig.add_subplot(1, 1, 1) set_axes_color(ax, col) #data plot hx = plt.hist(sum_edep, bins=nbins, color="lime", density=True, label="edep") #bin centers for the fit centers = (0.5 * (hx[1][1:] + hx[1][:-1])) #pass1, fit over the full range fit_data = DataFrame({"E": centers, "density": hx[0]}) pars, cov = curve_fit(lambda x, mu, sig: norm.pdf(x, loc=mu, scale=sig), fit_data["E"], fit_data["density"]) #print "pass1:", pars[0], pars[1] #pass2, fit in +/- 2*sigma range fitran = [pars[0] - 2. * pars[1], pars[0] + 2. * pars[1]] # fit range at 2*sigma fit_data = fit_data[fit_data["E"].between( fitran[0], fitran[1], inclusive=False)] # select the data to the range pars, cov = curve_fit(lambda x, mu, sig: norm.pdf(x, loc=mu, scale=sig), fit_data["E"], fit_data["density"]) #print "pass2:", pars[0], pars[1] #fit function x = np.linspace(fitran[0], fitran[1], 300) y = norm.pdf(x, pars[0], pars[1]) plt.plot(x, y, "k-", label="norm", color="red") ax.set_xlabel("Calorimeter signal (GeV)") ax.set_ylabel("Counts / {0:.3f} GeV".format( (plt.xlim()[1] - plt.xlim()[0]) / nbins)) #ax.set_title(r"$\text{"+infile+"}$") #print infile set_grid(plt, col) #mean_str = "{0:.4f} \pm {1:.4f}".format(pars[0], np.sqrt(cov[0,0])) #sigma_str = "{0:.4f} \pm {1:.4f}".format(pars[1], np.sqrt(cov[1,1])) #res = pars[1]/pars[0] #res_str = "{0:.4f}".format(res) #fit_param = r"\begin{align*}\mu &= " + mean_str + r"\\ \sigma &= " + sigma_str + r"\\" #fit_param += r"\sigma/\mu &= " + res_str + r"\end{align*}" #plot legend #leg_items = [Line2D([0], [0], lw=2, color="red"), Line2D([0], [0], lw=0)] leg_items = [Line2D([0], [0], lw=0)] #plt.rc("text", usetex = True) #plt.rc('text.latex', preamble='\usepackage{amsmath}') #ax.legend(leg_items, ["Gaussian fit", fit_param]) ax.legend(leg_items, [ "{0:.4f}, {1:.4f}, {2:.4f}".format(pars[0], pars[1], pars[1] / pars[0]) ]) #output log #out = open("out.txt", "w") #out.write( "{0:.4f} +/- {1:.4f} | ".format(pars[0], np.sqrt(cov[0,0])) ) #out.write( "{0:.4f} +/- {1:.4f} | ".format(pars[1], np.sqrt(cov[1,1])) ) #out.write( "{0:.4f}".format(pars[1]/pars[0]) ) #out.close() fig.savefig("01fig.pdf", bbox_inches="tight") plt.close() return pars[0], pars[1]
def likelihood(x, vol): sd = np.exp(vol) prob = 0 for i in norm.rvs(loc=mean_mu, scale=mean_sd, size=100): prob = prob + norm.pdf(x, i, sd) / 100 return prob
def pOfXpdf(x, feature): sigma = standardDeviation(feature) mean = sampleMean(feature) prob = norm.pdf(x, loc=mean, scale=sigma) return prob
summ = sum(diff) # print ("SUM - ",summ) summ /= (n * h * 1.0) y.append(summ) # print ("Prazen " - y) return y #Q-7-a # getting alphas X = np.arange(-5, 10, 0.1) alpha = X pr_dis = get_data() # True distribution Y0 = 0.25 * (norm.pdf(alpha, 0, 1) + norm.pdf(alpha, 3, 1) + norm.pdf(alpha, 6, 1) + norm.pdf(alpha, 9, 1)) # get kernel density estimator Y = get_prazen(alpha, 0.1, pr_dis) Y2 = get_prazen(alpha, 1, pr_dis) Y3 = get_prazen(alpha, 7, pr_dis) # plotting the graphs plt.figure('Kernel Density Estimation', figsize=(15, 10)) plt.plot(X, Y0, label='True pdf') plt.plot(X, Y, label='h = 0.1') plt.plot(X, Y2, label='h = 1') plt.plot(X, Y3, label='h = 7') plt.xlabel('x', fontsize=18) plt.ylabel('Pr[X<=x]', fontsize=18)
def bayesian_calculation_update(self, sensor_inputs, time_frame, ITER): error_val_list = [] mse_list = [] self.create_error_lists(time_frame) err_names = [] probs = [] errors = [] error_names = [] self.search_helper({}, {}, errors, error_names) # need to make sure all the lists do not have conflicts errors, error_names = self.check_conflicts(errors, error_names) for i in range(len(errors)): err_name = tuple( set([ item for sublist in error_names[i].values() for item in sublist ])) err_val = errors[i] full_err_name = error_names[i] if self.fft is True: for sensor in self.sensor_list: err_val[sensor] = scipy.ifft(err_val[sensor]) for key, value in self.sirens.items(): if full_err_name[key] in value.keys(): data_size = self.stored_data[key][ full_err_name[key]][1].shape[0] val = self.error_calculation(error_list=full_err_name[key], sensor=key, x=time_frame) err_val[key] = (1 - 1 / np.log(data_size)) * val + ( 1 / np.log(data_size)) * err_val[key] #if err_name == ('Error2',): #plt.clf() #plt.plot(time_frame, err_val[key], color= "red", label = "Estimated Line") #plt.plot(time_frame, sensor_inputs[key], color = "green", label = "Actual Line") #plt.xlabel("Timestamp") #plt.ylabel("Line Val") #plt.title(str(ITER)) #plt.legend() #plt.grid() #plt.show() error_val_list.append(err_val) prob = 0.0 mses = {} for j in self.sensor_list: for i in range(len(sensor_inputs[j])): gaussian_err = norm.pdf(sensor_inputs[j][i], loc=err_val[j][i], scale=self.sigmas[j]) prob += np.log(gaussian_err) mse = ((sensor_inputs[j] - err_val[j])**2).mean() mses[j] = mse err_names.append(err_name) probs.append(prob) mse_list.append(mses) ind, error, pro, mse = self.find_most_probable_error( err_names, probs, mse_list) return error, pro, mse, error_val_list[ind]
def energy_landscape_example(): fig, [ax] = panel(1, 1, l_p=0, r_p=0, b_p=0, t_p=0, dpi=100) rectangle(-1000, 1000, -1000, 1000, fc='g', ec='m', lw=4) plt.axis('off') xlim([-1000, 1000]) ylim([-1000, 1000]) from scipy.stats import norm np.random.seed(584111) x = np.arange(-1000, 1000, 1) y = 50 * np.sin(0.02 * x) + 50 * np.sin(0.05 * x) - 300 * np.sin( 0.007 * x) + 500 for i in range(1, 11): y -= np.random.random() * 2000 * norm.pdf((-1000 + 300 * i + x) / 20) y[0], y[-1] = -1000, -1000 ax.fill(x, y, c='m', alpha=0.2) ax.plot(x, y, c='m', alpha=0.5) pos = [-670] for i in pos: xn = np.argmax(i < x) ax.arrow(x[xn] + 30 + 100, y[xn] + 30, -50, 0, width=15, fc='grey', ec='grey') ax.arrow(x[xn] - 30 - 100, y[xn] + 30, 50, 0, width=15, fc='grey', ec='grey') ellipse([x[xn], y[xn] + 30], 20, alpha=1, fc='b') pos = [320] for i in pos: xn = np.argmax(i < x) ax.arrow(x[xn] + 30, y[xn] + 30, 50, 0, width=15, fc='grey', ec='grey') ax.arrow(x[xn] - 30, y[xn] + 30, -50, 0, width=15, fc='grey', ec='grey') ellipse([x[xn], y[xn] + 30], 20, alpha=1, fc='b') text(400, y[1400] - 100, r'\textbf{Global minima}', ha='center') text(250 + 100, y[1250] - 300, r'\textbf{Local minima}', ha='right') ax.arrow( 225, y[1225] - 200, 0, 130, width=15, fc='k', ) arrow( pos=[[-300, y[1250] - 300], [x[330], y[330]]], curve=-0.5, )
sample_std = np.std(x_sample, ddof=1) #------------------------------------------------------------ # Plot the sampled data fig, ax = plt.subplots(figsize=(5, 3.75)) ax.hist(x_sample, 20, histtype='stepfilled', normed=True, fc='#CCCCCC') x = np.linspace(-2.1, 4.1, 1000) factor1 = ratio_in / (1. + ratio_in) factor2 = 1. / (1. + ratio_in) ax.plot(x, gm.pdf(x), '-k', label='true distribution') ax.plot(x, gm.pdf_individual(x), ':k') ax.plot(x, norm.pdf(x, sample_mu, sample_std), '--k', label='best fit normal') ax.legend(loc=1) ax.set_xlim(-2.1, 4.1) ax.set_xlabel('$x$') ax.set_ylabel('$p(x)$') ax.set_title('Input pdf and sampled data') ax.text(0.95, 0.80, ('$\mu_1 = 0;\ \sigma_1=0.3$\n' '$\mu_2=1;\ \sigma_2=1.0$\n' '$\mathrm{ratio}=1.5$'), transform=ax.transAxes, ha='right', va='top')
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title plt.close('all') plt.figure(1, figsize=(15, 7)) x = [(i + 1) * 0.1 for i in range(9)] y = [norm.ppf(e) for e in x] plt.subplot(121) plt.plot(x, y, '-*b') plt.ylabel('percentile', labelpad=18) plt.xlabel(r'$\alpha$', labelpad=18) # plot our distribution xx = np.linspace(norm.ppf(0.01), norm.ppf(0.99), 100) plt.figure(1) plt.subplot(122) plt.plot(xx, norm.pdf(xx), 'r-', lw=2, alpha=0.6) plt.ylim([0, 0.5]) for e in y: print(norm.pdf(e)) plt.axvline(e, 0, norm.pdf(e) / 0.5, color='black', alpha=0.5) plt.ylabel('$f(x)$', labelpad=18) plt.xlabel('$x$', labelpad=18) plt.savefig('percentile_normal.png', bbox_inches='tight', dpi=300)
amp = np.max(px_spec_avg) new_dict[pixel] = amp # try baseline subtraction (from Brad's Renishaw-Reniawesome) base = baseline(px_spec[:,1], 4) plt.plot(base) base_sub = px_spec[:,1]-base plt.plot(base_sub) # gaussian fit try 1 popt, _ = optimize.curve_fit(gaussian, px_spec[:,0], px_spec[:,1]) plt.plot(px_spec[:,0], px_spec[:,1]) plt.plot(px_spec[:,0], gaussian(px_spec[:,0], *popt)) # gaussian fit try 2 mean,std=norm.fit(px_spec[:,1]) px_fit = norm.pdf(px_spec[:,0], mean, std) plt.plot(px_spec[:,0], px_spec[:,1]) plt.plot(px_spec[:,0], px_fit) # gaussian fit try 3 fitter = modeling.fitting.LevMarLSQFitter() model = modeling.models.Gaussian1D() # depending on the data you need to give some initial values fitted_model = fitter(model, px_spec[:,0], px_spec[:,1]) plt.plot(px_spec[:,0], px_spec[:,1]) plt.plot(px_spec[:,0], fitted_model(px_spec[:,0])) #pixel_spectrum_arr_5avg = moving_average(pixel_spectrum_arr[:,1], 5) # copy dictionary key # set value of copy dictionary key to max amplitude # store this key-value pair in new dictionary
def get_emission(self, n, state): X = self.X phi = self.phi prior = norm.pdf(X[n], phi[state][0], phi[state][1]) return (prior)
import os figdir = os.path.join(os.environ["PYPROBML"], "figures") def save_fig(fname): plt.savefig(os.path.join(figdir, fname)) from scipy.stats import t, laplace, norm a = np.random.randn(30) outliers = np.array([8, 8.75, 9.5]) plt.hist(a, 7, weights=[1 / 30] * 30, rwidth=0.8) #fit without outliers x = np.linspace(-5, 10, 500) loc, scale = norm.fit(a) n = norm.pdf(x, loc=loc, scale=scale) loc, scale = laplace.fit(a) l = laplace.pdf(x, loc=loc, scale=scale) fd, loc, scale = t.fit(a) s = t.pdf(x, fd, loc=loc, scale=scale) plt.plot(x, n, 'k>', x, s, 'r-', x, l, 'b--') plt.legend(('Gauss', 'Student', 'Laplace')) save_fig('robustDemoNoOutliers.pdf') #add the outliers plt.figure() plt.hist(a, 7, weights=[1 / 33] * 30, rwidth=0.8)
def getNegLogProbNorm(Param): avg = Param[0] std = Param[1] getNegLogProbNorm = -np.sum(np.log(norm.pdf(x=Data, loc=avg, scale=std))) return getNegLogProbNorm
def editsolvent_removal2(solvent, y_data, x_data, picked_peaks, peak_regions, grouped_peaks, total_params, uc): picked_peaks = np.array(picked_peaks) # define the solvents if solvent == 'chloroform': exp_ppm = [7.26] Jv = [[]] elif solvent == 'dimethylsulfoxide': exp_ppm = [2.50] Jv = [[1.9, 1.9]] elif solvent == 'methanol': exp_ppm = [4.78, 3.31] Jv = [[], [1.7, 1.7]] elif solvent == 'benzene': exp_ppm = [7.16] Jv = [[]] elif solvent == 'pyridine': exp_ppm = [8.74, 7.58, 7.22] Jv = [[], [], []] else: exp_ppm = [] Jv = [[]] # find picked peaks ppm values picked_peaks_ppm = x_data[picked_peaks] # make differences vector for referencing against multiple solvent peaks differences = [] peaks_to_remove = [] solvent_regions = [] # now remove each peak in turn for ind1, speak_ppm in enumerate(exp_ppm): # if only a singlet is expected for this peak find solvent peak based on amplitude and position if len(Jv[ind1]) == 0: probs = norm.pdf(abs(picked_peaks_ppm - speak_ppm), loc=0, scale=0.1) * y_data[picked_peaks] # find the maximum probability w = np.argmax(probs) # append this to the list to remove peaks_to_remove.append(picked_peaks[w]) # append this to the list of differences differences.append(speak_ppm - picked_peaks_ppm[w]) # if the peak displays a splitting pattern then we have to be a bit more selective # do optimisation problem with projected peaks else: amp_res = [] dist_res = [] pos_res = [] # limit the search to peaks +- 1 ppm either side srange = (picked_peaks_ppm > speak_ppm - 1) * (picked_peaks_ppm < speak_ppm + 1) for peak in picked_peaks_ppm[srange]: # print("picked ppm ", peak) fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak(peak, Jv[ind1], np.arange(len(x_data)), 0.1, uc, 1) diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks))) for i, f in enumerate(fit_s_peaks): for j, g in enumerate(picked_peaks): diff_matrix[i, j] = abs(f - g) # minimise these distances vertical_ind, horizontal_ind = optimise(diff_matrix) closest_peaks = np.sort(picked_peaks[horizontal_ind]) closest_amps = [] for cpeak in closest_peaks: closest_amps.append(total_params['A' + str(cpeak)]) # find the amplitude residual between the closest peaks and the predicted pattern # normalise these amplitudes amp_vector = [i / max(amp_vector) for i in amp_vector] closest_amps = [i / max(closest_amps) for i in closest_amps] # append to the vector amp_res.append(sum([abs(amp_vector[i] - closest_amps[i]) for i in range(len(amp_vector))])) dist_res.append(np.sum(np.abs(closest_peaks - fit_s_peaks))) pos_res.append(norm.pdf(abs(peak - speak_ppm), loc=0, scale=0.5)) # use the gsd data to find amplitudes of these peaks pos_res = [1 - i / max(pos_res) for i in pos_res] dist_res = [i / max(dist_res) for i in dist_res] amp_res = [i / max(amp_res) for i in amp_res] # calculate geometric mean of metrics for each peak g_mean = [(dist_res[i] + amp_res[i] + pos_res[i]) / 3 for i in range(0, len(amp_res))] # compare the residuals and find the minimum minres = np.argmin(g_mean) # append the closest peaks to the vector fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak(picked_peaks_ppm[srange][minres], Jv[ind1], np.arange(len(x_data)), 0.1, uc, 1) diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks))) for i, f in enumerate(fit_s_peaks): for j, g in enumerate(picked_peaks): diff_matrix[i, j] = abs(f - g) # minimise these distances vertical_ind, horizontal_ind = optimise(diff_matrix) closest_peaks = np.sort(picked_peaks[horizontal_ind]) for peak in closest_peaks: ind3 = np.abs(picked_peaks - peak).argmin() peaks_to_remove.append(picked_peaks[ind3]) differences.append(picked_peaks_ppm[ind3] - uc.ppm(peak)) # find the region this peak is in and append it to the list for peak in peaks_to_remove: for ind2, region in enumerate(peak_regions): if (peak > region[0]) & (peak < region[-1]): solvent_regions.append(ind2) break # now remove the selected peaks from the picked peaks list and grouped peaks w = np.searchsorted(picked_peaks, peaks_to_remove) picked_peaks = np.delete(picked_peaks, w) for ind4, peak in enumerate(peaks_to_remove): grouped_peaks[solvent_regions[ind4]] = np.delete(grouped_peaks[solvent_regions[ind4]], np.where(grouped_peaks[solvent_regions[ind4]] == peak)) # resimulate the solvent regions solvent_region_ind = sorted(list(set(solvent_regions))) # now need to reference the spectrum # differences = list of differences in ppm found_solvent_peaks - expected_solvent_peaks s_differences = sum(differences) x_data = x_data + s_differences return peak_regions, picked_peaks, grouped_peaks, x_data, solvent_region_ind
#!/usr/bin/env python3 import numpy as np from scipy.stats import norm import matplotlib.pyplot as plt plt.style.use('seaborn-darkgrid') # compute cumulative distribution function (CDF): P(-1.45 < Z < 1.45) x = -1.45; y = 1.45; loc = 0; scale = 1 pls = norm.cdf(y, loc, scale) - norm.cdf(x, loc, scale) print("Area under the standard normal curve P(-1.45 < Z < 1.45): {:.5f}".format(pls)) x1=np.linspace(x,y, 1000); y1=norm.pdf(x1,loc,scale) x2=np.linspace(-4,4,1000); y2=norm.pdf(x2,loc,scale) ax = plt.figure().add_subplot(111); ax.minorticks_on() ax.plot(x2,y2,c='r',label='P(-1.45 < Z < 1.45) = {:.5f}'.format(pls)) ax.set(xlabel='X',ylabel='P(X)'); ax.legend(handlelength=0) ax.fill_between(x1,y1,alpha=0.5,color='r'); ax.set(xlim=[-4,4]) plt.savefig('norm1f.pdf',dpi=72,bbox_inches='tight'); plt.show()
def test_probit(): for Y, T in [(np.random.binomial(1, 0.5, size=(10, )), np.ones(10)), (np.random.binomial(1, 0.5, size=(10, )), None), (np.random.binomial(3, 0.5, size=(10, )), 3 * np.ones(10))]: X = np.random.standard_normal((10, 5)) for case_weights in [None, np.ones(10)]: L = glm.glm.probit(X, Y, trials=T, case_weights=case_weights) L.smooth_objective(np.zeros(L.shape), 'both') L.hessian(np.zeros(L.shape)) sat_sub = L.saturated_loss.subsample(np.arange( 5)) # check that subsample of saturated loss at least works sat_sub.smooth_objective(np.zeros(sat_sub.shape)) # check that subsample is getting correct answer Xsub = X[np.arange(5)] Ysub = Y[np.arange(5)] if T is not None: Tsub = T[np.arange(5)] T_num = T else: Tsub = np.ones(5) T_num = np.ones(10) beta = np.ones(L.shape) if case_weights is not None: Lsub2 = glm.glm.probit(Xsub, Ysub, trials=Tsub, case_weights=case_weights[np.arange(5)]) Lsub3 = glm.glm.probit(Xsub, Ysub, trials=Tsub, case_weights=case_weights[np.arange(5)]) case_cp = case_weights.copy() * 0 case_cp[np.arange(5)] = 1 Lsub4 = glm.glm.probit(X, Y, trials=T, case_weights=case_cp) else: Lsub2 = glm.glm.probit(Xsub, Ysub, trials=Tsub) Lsub3 = glm.glm.probit(Xsub, Ysub, trials=Tsub) Lsub3.coef *= 2. f2, g2 = Lsub2.smooth_objective(beta, 'both') f3, g3 = Lsub3.smooth_objective(beta, 'both') f4, g4 = Lsub2.smooth_objective(beta, 'both') np.testing.assert_allclose(f3, 2 * f2) np.testing.assert_allclose(g3, 2 * g2) np.testing.assert_allclose(f2, f4) np.testing.assert_allclose(g2, g4) Lcp = copy(L) prev_value = L.smooth_objective(np.zeros(L.shape), 'func') L_sub = L.subsample(np.arange(5)) L_sub.coef *= 45 new_value = L.smooth_objective(np.zeros(L.shape), 'func') assert (prev_value == new_value) np.testing.assert_allclose(L_sub.gradient(beta), 45 * Lsub2.gradient(beta)) linpred = X.dot(beta) np.testing.assert_allclose( L.gradient(beta), X.T.dot(-normal_dbn.pdf(linpred) * (Y / normal_dbn.cdf(linpred) - (T_num - Y) / normal_dbn.sf(linpred)))) linpred = Xsub.dot(beta) np.testing.assert_allclose( L_sub.gradient(beta), 45 * Xsub.T.dot(-normal_dbn.pdf(linpred) * (Ysub / normal_dbn.cdf(linpred) - (Tsub - Ysub) / normal_dbn.sf(linpred)))) # other checks on gradient if T is None: sat = L.saturated_loss np.testing.assert_allclose( sat.smooth_objective(np.zeros(sat.shape), 'grad'), (0.5 - Y) * normal_dbn.pdf(0) / 0.25) np.testing.assert_allclose( L.gradient(np.zeros(L.shape)), X.T.dot(0.5 - Y) * normal_dbn.pdf(0) / 0.25) np.testing.assert_allclose( L.hessian(np.zeros(L.shape)), X.T.dot(X) / 0.25 * normal_dbn.pdf(0)**2) else: L.gradient(np.zeros(L.shape)) L.hessian(np.zeros(L.shape)) L.objective(np.zeros(L.shape)) L.latexify() L.saturated_loss.data = (Y, T) L.saturated_loss.data L.data = (X, (Y, T)) L.data
from scipy.stats import norm from scipy.misc import comb from scipy.stats import beta from scipy import integrate import matplotlib.pyplot as plt import numpy import math import pylab alpha=[5,3,1] # define alpha pars for inverse gamma dist betaa=[5,15,30] # define beta pars for inverse gamma dist y_value=[10,3,3] # number of heads N=[20,10,10] #number of tosses x=numpy.linspace(-0.25,1,100) # intervals for r ( it can't be megative, but negative r is considered for visul purposeis ) for i in range( len(alpha)): a=' Inverse Gamma :alpha={} beta={}'.format(alpha[i],betaa[i]) b=' Laplace Approx :alpha={} beta={}'.format(alpha[i],betaa[i]) real_dist= lambda x : comb(N[i],y_value[i])*(x**y_value[i])*((1-x)**(N[i]-y_value[i]))*beta.pdf(x,alpha[i],betaa[i]) # prior * liklihood marginal_liklihood=integrate.quad(real_dist, x[0], x[-1])[0] # calculating marginal liklihood (i.e, normalization tep) map_r=(y_value[i]+alpha[i]-1)/(alpha[i]+N[i]+betaa[i]-2) # MAP for r print('r={}',format(map_r)) g_2d=((-alpha[i]-y_value[i]+1)/(map_r**2))+((-betaa[i]-N[i]+y_value[i]+1)/(1-map_r)**2) # calculating hessian matrix variance_laplace=-(1/g_2d) plt.plot(x,norm.pdf(x,map_r, math.sqrt(variance_laplace)),label=b) print('sigma={}',format(math.sqrt(variance_laplace))) plt.plot(x,real_dist(x)/marginal_liklihood,label=a) pylab.legend(loc='upper right') #normal_pdf=norm.pdf(x,map_r, math.sqrt(variance_laplace)) # real_dist=comb(N[i],y_value[i])*(x**y_value[i])*((1-x)**(N[i]-y_value[i]))*beta.pdf(x,alpha[i],betaa[i]) plt.xlabel('r value') plt.ylabel('p(r)')
def out(w1, w2): err = y0 - (w1 + w2 * x0) return norm.pdf(err, loc=0, scale=likelihoodSD)
fig, ax = plt.subplots() ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.9) ax.legend(loc='upper center') plt.show() fig, ax = plt.subplots() ax.plot(x, y, 'r-', linewidth=2, label='$y=\sin(x)$', alpha=0.6) ax.legend(loc='upper center') ax.set_yticks([-1,-.5, 0, .5, 1]) plt.title('Test plot') plt.show() ##More plot on one axis from scipy.stats import norm from random import uniform fig, ax = plt.subplots() x = np.linspace(-4, 4, 150) for i in range(3): print(i) m, s = uniform(-1, 1), uniform(1, 2) y = norm.pdf(x, loc=m, scale=s) current_label = '$\mu = {m:.2}$' ax.plot(x, y, linewidth=2, alpha=0.6, label=current_label) ax.legend() plt.show()
def precompute_sensor_model(self): print "Precomputing sensor model" table_width = int(self.MAX_RANGE_PX) + 1 #meshgrid of data (x, y) = np.meshgrid(np.linspace(0, self.MAX_RANGE_PX, table_width), np.linspace(0, self.MAX_RANGE_PX, table_width)) #normal along identity z = 2 * norm.pdf(x, y, 5) #uniform z += 2.0 / self.MAX_RANGE_PX #ramp for row in xrange(table_width): z[row][0:row] += .01 - .01 * np.arange(row, dtype=np.float32) / row #max_dist z[:, -1:] = .3 #normalize #for i in range(len(z)): # z[i]= z[i]/sum(z[i]) z / z.sum(axis=1, keepdims=True) #transpose and save it had to use ascontiguousarray for cpython to be happy self.sensor_model_table = np.ascontiguousarray(z.T) # upload the sensor model to RangeLib for ultra fast resolution later self.range_method.set_sensor_model(self.sensor_model_table) # code to generate visualizations of the sensor model if False: # visualize the sensor model fig = plt.figure() ax = fig.gca(projection='3d') # Make data. X = np.arange(0, table_width, 1.0) Y = np.arange(0, table_width, 1.0) X, Y = np.meshgrid(X, Y) # Plot the surface. surf = ax.plot_surface(X, Y, self.sensor_model_table, cmap="bone", rstride=2, cstride=2, linewidth=0, antialiased=True) ax.text2D(0.05, 0.95, "Precomputed Sensor Model", transform=ax.transAxes) ax.set_xlabel('Ground truth distance (in px)') ax.set_ylabel('Measured Distance (in px)') ax.set_zlabel('P(Measured Distance | Ground Truth)') plt.show() elif False: plt.imshow(self.sensor_model_table * 255, cmap="gray") plt.show() elif False: plt.plot(self.sensor_model_table[:, 140]) plt.plot([139, 139], [0.0, 0.08], label="test") plt.ylim(0.0, 0.08) plt.xlabel("Measured Distance (in px)") plt.ylabel("P(Measured Distance | Ground Truth Distance = 140px)") plt.show()
def prob(val, mu, sig, lam): p = lam for i in range(len(val)): p *= norm.pdf(val[i], mu[i], sig[i][i]) return p
def sigmoid(x): return 1 / (1 + np.exp(-x)) nT = 30 # Temporal filter length nX = 40 # Spatial filter length N = 30 # Number of neurons temp_filt = np.zeros((N, nT)) spat_filt = np.zeros((N, nX)) # Define filters manually for i in range(N - 15): temp_filt[i, :] = norm.pdf(np.linspace(-15, 15, nT), loc=i - 12.5 + i / 1.5, scale=.8) spat_filt[i, :] = norm.pdf( np.linspace(-10, 10, nX), i - 7.5, scale=.8) - .8 * norm.pdf(np.linspace(-10, 10, nX), i - 7.5, 1) for i in range(N - 15, N): temp_filt[i, :] = norm.pdf( np.linspace(-5, 5, nT), loc=.45 * i - 10, scale=.8) - .6 * norm.pdf( np.linspace(-5, 5, nT), loc=.45 * i - 9, scale=.8) spat_filt[i, :] = norm.pdf( np.linspace(-15, 15, nX), loc=1.5 * i - 33, scale=.7) - .5 * norm.pdf( np.linspace(-15, 15, nX), loc=1.5 * i - 33, scale=1.5) bias = -3 - 2 * np.random.uniform(size=N) # Bias print(bias)
for iteration in range(0, 250): # We need to have observed at least 3 items for the model to be able to predict surr_predictions = np.zeros_like(test_index) if iteration > 2 and alpha < 1: surr_estimator.train( np.array(observed_X).astype(float), np.array(observed_y)) mu, var = surr_estimator.predict( np.array(surr_X.iloc[test_index]).astype(float)) mu = mu.reshape(-1) var = var.reshape(-1) sigma = np.sqrt(var) diff = mu - np.max(observed_y) Z = diff / sigma ei = diff * norm.cdf(Z) + sigma * norm.pdf(Z) surr_predictions = ei # surr_predictions = surr_estimator.predict(np.array(surr_X.iloc[test_index]).astype(float)) # print(iteration, "\t", np.std(surr_predictions), "\t", np.std(meta_predictions)) m_corr, m_pvalue = kendalltau(meta_predictions, y[test_index]) s_corr, s_pvalue = kendalltau(surr_predictions, y[test_index]) if s_corr > m_corr: surpassed = iteration if surpassed is None else surpassed # alpha == 0: Only surrogate predictions # alpha == 1: Only meta-model predictions corrected_iteration = np.maximum(1, iteration - 2) # scaled_meta_predictions = MinMaxScaler().fit_transform(meta_predictions.reshape(-1, 1)).reshape(-1)
r_list = np.random.random(N) # list with N random numbers between 0 and 1 for i in range(N): if r_list[i] >= 0.5: part_pos_list[i] += h # One step to the right else: part_pos_list[i] -= h # One step to the left # find standard deviation mu and variance sigma to the normal distribution best suited to part_pos_list mu, sigma = norm.fit(part_pos_list) print("mu =", mu, "sigma =", sigma) # pre-plotting xMax = np.max(np.abs(part_pos_list)) # maximum absolute x position value xRange = (-xMax * 1.1, xMax * 1.1) # Range for the plot xAx = np.linspace(*xRange, 1000) # list og x values for normal distribution p = norm.pdf(xAx, mu, sigma) # normal distribution # plotting savename = "RandomWalkIn1D" fig, ax = plt.subplots(1, 1, num=savename) # new axis for p ax2 = ax.twinx() # Set ax's patch invisible ax.patch.set_visible(False) # move ax in front ax.set_zorder(ax2.get_zorder() + 1) # Histogram, bins is given as the number of possible positions for a particle # distribution=False because True will mess with scaling hist, bins = np.histogram(part_pos_list, bins=(2 * Ntime + 1), density=True,
def EI(mean, std, max_val, tradeoff): z = (mean - max_val - tradeoff) / std return (mean - max_val - tradeoff) * ndtr(z) + std * norm.pdf(z)
# mean - get_error_conf(sample_size, confidence, std), mean + get_error_conf(sample_size, confidence, std) interval = norm.interval(alpha=confidence, loc=mean, scale=std / np.sqrt(sample_size)) return interval if __name__ == "__main__": """ If the module is called as script, plot the probability density function and the cumulative distribution function. Modified from: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html """ from scipy.stats import norm import matplotlib.pyplot as plt import numpy as np print('Plotting the Probability Density Function' + '\n and the Cumulative Density Function') x = np.linspace(norm.ppf(0.001), norm.ppf(0.999), 100) plt.subplot(1, 2, 1) plt.plot(x, norm.pdf(x), 'r-', lw=5, alpha=0.6) plt.title("Probability Density Function") plt.xlabel("norm.ppf(0.001) <= x <= norm.ppf(0.999)") plt.ylabel("norm.pdf(x)") plt.subplot(1, 2, 2) plt.plot(x, norm.cdf(x), 'b-', lw=5, alpha=0.6) plt.title("Cumulative Density Function") plt.xlabel("norm.ppf(0.001) <= x <= norm.ppf(0.999)") plt.ylabel("norm.cdf(x)") plt.show()
def detect_knee(data, window_size=1, s=10): """ Detect the so-called knee in the data. The implementation is based on paper [1] and code here (https://github.com/jagandecapri/kneedle). @param data: The 2d data to find an knee in. @param window_size: The data is smoothed using Gaussian kernel average smoother, this parameter is the window used for averaging (higher values mean more smoothing, try 3 to begin with). @param s: How many "flat" points to require before we consider it a knee. @return: The knee values. """ data_size = len(data) data = np.array(data) if data_size == 1: return None # smooth smoothed_data = [] for i in range(data_size): if 0 < i - window_size: start_index = i - window_size else: start_index = 0 if i + window_size > data_size - 1: end_index = data_size - 1 else: end_index = i + window_size sum_x_weight = 0 sum_y_weight = 0 sum_index_weight = 0 for j in range(start_index, end_index): index_weight = norm.pdf(abs(j - i) / window_size, 0, 1) sum_index_weight += index_weight sum_x_weight += index_weight * data[j][0] sum_y_weight += index_weight * data[j][1] smoothed_x = sum_x_weight / sum_index_weight smoothed_y = sum_y_weight / sum_index_weight smoothed_data.append((smoothed_x, smoothed_y)) smoothed_data = np.array(smoothed_data) # normalize normalized_data = MinMaxScaler().fit_transform(smoothed_data) # difference differed_data = [(x, y - x) for x, y in normalized_data] # find indices for local maximums candidate_indices = [] for i in range(1, data_size - 1): if (differed_data[i - 1][1] < differed_data[i][1]) and ( differed_data[i][1] > differed_data[i + 1][1]): candidate_indices.append(i) # threshold step = s * (normalized_data[-1][0] - data[0][0]) / (data_size - 1) # knees knee_indices = [] for i in range(len(candidate_indices)): candidate_index = candidate_indices[i] if i + 1 < len(candidate_indices): # not last second end_index = candidate_indices[i + 1] else: end_index = data_size threshold = differed_data[candidate_index][1] - step for j in range(candidate_index, end_index): if differed_data[j][1] < threshold: knee_indices.append(candidate_index) break if knee_indices != []: return knee_indices #data[knee_indices] else: return None
def gaussian_ei(X, model, y_opt=0.0, xi=0.01, return_grad=False): """ Use the expected improvement to calculate the acquisition values. The conditional probability `P(y=f(x) | x)`form a gaussian with a certain mean and standard deviation approximated by the model. The EI condition is derived by computing ``E[u(f(x))]`` where ``u(f(x)) = 0``, if ``f(x) > y_opt`` and ``u(f(x)) = y_opt - f(x)``, if``f(x) < y_opt``. This solves one of the issues of the PI condition by giving a reward proportional to the amount of improvement got. Note that the value returned by this function should be maximized to obtain the ``X`` with maximum improvement. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: Values where the acquisition function should be computed. * `model` [sklearn estimator that implements predict with ``return_std``]: The fit estimator that approximates the function through the method ``predict``. It should have a ``return_std`` parameter that returns the standard deviation. * `y_opt` [float, default 0]: Previous minimum value which we would like to improve upon. * `xi`: [float, default=0.01]: Controls how much improvement one wants over the previous best values. Useful only when ``method`` is set to "EI" * `return_grad`: [boolean, optional]: Whether or not to return the grad. Implemented only for the case where ``X`` is a single sample. Returns ------- * `values`: [array-like, shape=(X.shape[0],)]: Acquisition function values computed at X. """ with warnings.catch_warnings(): warnings.simplefilter("ignore") if return_grad: mu, std, mu_grad, std_grad = model.predict(X, return_std=True, return_mean_grad=True, return_std_grad=True) else: mu, std = model.predict(X, return_std=True) values = np.zeros_like(mu) mask = std > 0 improve = y_opt - xi - mu[mask] scaled = improve / std[mask] cdf = norm.cdf(scaled) pdf = norm.pdf(scaled) exploit = improve * cdf explore = std[mask] * pdf values[mask] = exploit + explore if return_grad: if not np.all(mask): return values, np.zeros_like(std_grad) # Substitute (y_opt - xi - mu) / sigma = t and apply chain rule. # improve_grad is the gradient of t wrt x. improve_grad = -mu_grad * std - std_grad * improve improve_grad /= std**2 cdf_grad = improve_grad * pdf pdf_grad = -improve * cdf_grad exploit_grad = -mu_grad * cdf - pdf_grad explore_grad = std_grad * pdf + pdf_grad grad = exploit_grad + explore_grad return values, grad return values
b: upper bound of the interval of test values n: number of test values name: name of the test to be printed by the test program c_fn_name: name of the C function being tested args: additional arguments of the C function being tested """ TestParams = namedtuple('TestParams', 'a b n f name c_fn_name args'.split()) test_params_list = [ TestParams(-1, 2, 100, uniform.pdf, 'uniform(0, 1) pdf', 'uniform_pdf', (0, 1)), TestParams(-1, 2, 100, uniform.cdf, 'uniform(0, 1) cdf', 'uniform_cdf', (0, 1)), TestParams(-10, 10, 1000, norm.pdf, 'gaussian(0, 1) pdf', 'gaussian_pdf', (0, 1)), TestParams(-10, 20, 1000, lambda x: norm.pdf(x, 10, 2**.5), 'gaussian(10, 2) pdf', 'gaussian_pdf', (10, 2)), TestParams(-10, 10, 1000, norm.cdf, 'gaussian(0, 1) cdf', 'gaussian_cdf', (0, 1)), TestParams(-10, 20, 1000, lambda x: norm.cdf(x, 10, 2**.5), 'gaussian(10, 2) cdf', 'gaussian_cdf', (10, 2)), TestParams(-1, 10, 1000, lambda x: gamma.pdf(x, 1), 'gamma(1, 1) pdf', 'gamma_pdf', (1, 1)), TestParams(-1, 10, 1000, lambda x: gamma.pdf(x, 1.25, 0, 1), 'gamma(1.25, 1) pdf', 'gamma_pdf', (1.25, 1)), TestParams(-1, 10, 1000, lambda x: gamma.pdf(x, 1.25, 0, 1 / 2), 'gamma(1.25, 2) pdf', 'gamma_pdf', (1.25, 2)), TestParams(-1, 10, 1000, lambda x: gamma.cdf(x, 1), 'gamma(1, 1) cdf', 'gamma_cdf', (1, 1)), TestParams(-1, 10, 1000, lambda x: gamma.cdf(x, 1.25, 0, 1), 'gamma(1.25, 1) cdf', 'gamma_cdf', (1.25, 1)),
import matplotlib.pyplot as plt import numpy as np from scipy.stats import norm x = np.linspace(-4, 4, num=100) fig = plt.figure(figsize=(8, 5)) ax = fig.add_subplot() ax.plot(x, norm.pdf(x, loc=-1, scale=1), color="magenta") ax.plot(x, norm.pdf(x, loc=0, scale=1), color=(0.85, 0.64, 0.12)) ax.plot(x, norm.pdf(x, loc=1, scale=1), color="#228B22") plt.savefig('colours.svg', bbox_inches='tight') plt.show()