def mse_exp(theoretical_distribution, estimated_distribution): theoretical_lambda = theoretical_distribution[1] theoretical_scale = 1 / theoretical_lambda estimated_lambda = estimated_distribution[1] estimated_scale = 1 / estimated_lambda linspace = np.linspace(expon.ppf(0.001, scale=theoretical_scale), expon.ppf(0.999, scale=theoretical_scale), 1000) theoretical_pdf = expon.pdf(linspace, scale=theoretical_scale) estimated_pdf = expon.pdf(linspace, scale=estimated_scale) mse_pdf = mean_squared_error(theoretical_pdf, estimated_pdf) theoretical_cdf = expon.cdf(linspace, scale=theoretical_scale) estimated_cdf = expon.cdf(linspace, scale=estimated_scale) mse_cdf = mean_squared_error(theoretical_cdf, estimated_cdf) theoretical_reliability = 1 - expon.cdf(linspace, scale=theoretical_scale) estimated_reliability = 1 - expon.cdf(linspace, scale=estimated_scale) mse_reliability = mean_squared_error(theoretical_reliability, estimated_reliability) return [mse_pdf, mse_cdf, mse_reliability]
def objects_walls_algorithm(cmd, world, k1=4.2, k2=4.4): x, y = np.mgrid[0:world.xdim:.1, 0:world.ydim:.1] # Calculate naive distribution naive_dist = naive_algorithm(cmd, world) naive_vals = naive_dist.pdf(np.dstack((x, y))) # Find Distance to closest object ref_dists = {ref : np.sqrt((x - ref.center[0])**2 + (y - ref.center[1])**2) for ref in world.references} min_ref_dists = np.min(np.dstack(ref_dists[ref] for ref in ref_dists), axis=2) # Difference between distance to closest object and object reference in command ref_distance_diff = ref_dists[cmd.reference] - min_ref_dists ref_distance_vals = expon.pdf(ref_distance_diff, scale=k1) # Find distance to nearest wall min_wall_dists = np.min(np.dstack((x, y, world.xdim - x, world.ydim - y)), axis=2) # Difference between distance to closest wall and object reference in command wall_distance_diff = ref_dists[cmd.reference] - min_wall_dists wall_distance_diff[wall_distance_diff < 0] = 0 wall_distance_vals = expon.pdf(wall_distance_diff, scale=k2) mean_prob = naive_vals*ref_distance_vals*wall_distance_vals loc = np.where(mean_prob == mean_prob.max()) mean = 0.1*np.array([loc[0][0], loc[1][0]]) mv_dist = multivariate_normal(mean, naive_dist.cov) return mv_dist
def rayleigh_noise_signal_error(bins, param_s1,param_s2,param_n1, param_n2, flag): pdf_fitted1 = expon.pdf(bins,loc=param_s1,scale=param_s2) pdf_fitted2 = expon.pdf(bins,loc=param_n1,scale=param_n2) mean_square_err =mean_squared_error(pdf_fitted1, pdf_fitted2) root_mean_square_err = sqrt(mean_square_err) return rmse
def plot_kde(true_dist, num_samples, kernel_function): """ Visualize kernel density estimates using both bandwidth obtained from plugin method and cross validation """ x_values = np.array([]) if true_dist == 'exp': x_values = np.random.exponential(1, num_samples) elif true_dist == 'norm': x_values = np.random.normal(0, 1, num_samples) x = np.arange(min(x_values), max(x_values), .01) h_opt = calculate_optimum_bandwidth(x_values, kernel_function) h_cv = estimate_bandwidth(x, gaussian_pdf, true_dist) fig = plt.figure() # plugin optimal bandwidth ax = fig.add_subplot(2, 2, 1) dist_h_opt = kde_pdf(x_values, kernel_func=kernel_function, bandwidth=h_opt) y = [dist_h_opt(i) for i in x] ys = [dist_h_opt(i) for i in x_values] ax.scatter(x_values, ys) if true_dist == 'exp': ax.plot(x, expon.pdf(x)) elif true_dist == 'norm': ax.plot(x, norm.pdf(x, 0, 1)) ax.plot(x, y) # bandwidth chosen from cross validation ax1 = fig.add_subplot(2, 2, 2) dist_h_cv = kde_pdf(x_values, kernel_func=kernel_function, bandwidth=h_cv) y1 = [dist_h_cv(i) for i in x] ys1 = [dist_h_cv(i) for i in x_values] ax1.scatter(x_values, ys1) if true_dist == 'exp': ax1.plot(x, expon.pdf(x)) elif true_dist == 'norm': ax1.plot(x, norm.pdf(x, 0, 1)) ax1.plot(x, y1) # display gridlines ax.grid(True) ax1.grid(True) # display legend in each subplot leg4 = mpatches.Patch(color=None, label=f'plug-in bandwidth={h_opt}') leg5 = mpatches.Patch(color=None, label=f'cross-validated bandwidth={h_cv}') ax.legend(handles=[leg4]) ax1.legend(handles=[leg5]) plt.tight_layout() plt.show()
def generate_coefs(index, columns): simulated_coefs_df = pd.DataFrame(0, index=index, columns=columns) # get the indices of each group of features ind_demo = [columns.index(col) for col in columns if "demo" in col] ind_proxy = [columns.index(col) for col in columns if "proxy" in col] ind_investment = [ columns.index(col) for col in columns if "investment" in col ] for i in range(7): outcome_name = simulated_coefs_df.index[i] if "proxy" in outcome_name: ind_same_proxy = [ ind for ind in ind_proxy if outcome_name in columns[ind] ] # print(ind_same_proxy) random_proxy_name = np.random.choice( [proxy for proxy in index[:4] if proxy != outcome_name]) ind_random_other_proxy = [ ind for ind in ind_proxy if random_proxy_name in columns[ind] ] # demo simulated_coefs_df.iloc[ i, np.random.choice(ind_demo, 2)] = np.random.uniform( 0.004, 0.05) # same proxy simulated_coefs_df.iloc[i, ind_same_proxy] = sorted( np.random.choice(expon.pdf(np.arange(10)) * 5e-1, 6, replace=False)) simulated_coefs_df.iloc[i, ind_random_other_proxy] = sorted( np.random.choice(expon.pdf(np.arange(10)) * 5e-2, 6, replace=False)) elif "investment" in outcome_name: ind_same_invest = [ ind for ind in ind_investment if outcome_name in columns[ind] ] random_proxy_name = np.random.choice(index[:4]) ind_random_other_proxy = [ ind for ind in ind_proxy if random_proxy_name in columns[ind] ] simulated_coefs_df.iloc[ i, np.random.choice(ind_demo, 2)] = np.random.uniform( 0.001, 0.05) simulated_coefs_df.iloc[i, ind_same_invest] = sorted( np.random.choice(expon.pdf(np.arange(10)) * 5e-1, 6, replace=False)) simulated_coefs_df.iloc[i, ind_random_other_proxy] = sorted( np.random.choice(expon.pdf(np.arange(10)) * 1e-1, 6, replace=False)) return simulated_coefs_df
def testScipyExponential(): data0 = expon.rvs(scale=10, size=1000) ################### data = data0 plt.figure() x = np.linspace(0, 100, 100) plt.hist(data, bins=x, normed=True) plt.plot(x, expon.pdf(x, loc=0, scale=10), color='g') #loc, scale = expon.fit(data, floc=0) #plt.plot(x, expon.pdf(x, loc=loc, scale=scale), color='r') removedHeadLength = 1.0 dataNoHead = [v for v in data if v > removedHeadLength] loc1, scale1 = expon.fit(dataNoHead) plt.plot(x, expon.pdf(x, loc=0, scale=scale1), color='b') loc, scale = expon.fit(dataNoHead, floc=removedHeadLength) plt.plot(x, expon.pdf(x, loc=0, scale=scale), color='r') # non-normed graphs # plt.figure() # plt.hist(data0, bins=x, normed=False) # plt.plot(x, expon.pdf(x, loc=0, scale=10)*len(data0), color='r') plt.figure() plt.hist(dataNoHead, bins=x, normed=False) # s = len(dataNoHead) / sInvNormalisation = intergral(pdf, removedHeadLength, infty) int_0_removedHeadLength_expon = np.exp(- float(removedHeadLength) / scale) s = len(dataNoHead) / int_0_removedHeadLength_expon s0 = len(data0) / 1.0 print >> sys.stderr, s0, len(dataNoHead), s plt.plot(x, expon.pdf(x, loc=0, scale=scale)*s, color='r') ############################################################## # deprecated ############################################################## # non-linear fit #A, K, C = fit_exp_nonlinear(t, noisy) # linear fit with the constant set to 0 # C = 0 # A, K = fit_exp_linear(t, noisy, C) # ysModel = model_func(t, A, K, C) #plt.tight_layout() #plt.xlim(0, 100) #plt.title("OSB length distribution in Human-Mouse comparison \n Confidence Interval : %s*sigma around mean" % arguments["ICfactorOfSigma"] ) #plt.title("") # #plt.legend() #plt.savefig(sys.stdout, format='svg')
def error_calculation(bins, param_s1,param_s2,param_n1, param_n2, flag): if flag==1: pdf_fitted1 = rayleigh.pdf(bins,loc=param_s1,scale=param_s2) # fitted distribution - rayleigh pdf_fitted2 = rayleigh.pdf(bins,loc=param_n1,scale=param_n2) # fitted distribution - rayleigh elif flag==0: pdf_fitted1 = expon.pdf(bins,loc=param_s1,scale=param_s2) # fitted distribution - exponential pdf_fitted2 = expon.pdf(bins,loc=param_n1,scale=param_n2) # fitted distribution - exponential l1_norm=0 mean_square_err =mean_squared_error(pdf_fitted1, pdf_fitted2) l1_norm = sum(abs(pdf_fitted1 - pdf_fitted2)) root_mean_square_err = sqrt(mean_square_err) mean_l1_norm = 1.0*l1_norm/len(pdf_fitted1) print "Root Mean Square Error= ", root_mean_square_err print "Mean L1 norm= ", mean_l1_norm
def main(): src_path_map = '../data/map/wean.dat' map_obj = MapReader(src_path_map) occupancy_map = map_obj.get_map() sensor_model = SensorModel(occupancy_map) particleMeasurement = 500 probabilities = np.zeros(1000) index = 0 for actualMeasurement in range(1000): probabilities[index] = sensor_model.calculateProbability( actualMeasurement, particleMeasurement) index += 1 plotProbabilities(probabilities) numSamples = 1000 stdev = 100 gaussPDF = signal.gaussian(numSamples * 2, std=stdev) #plotProbabilities(gaussPDF) # my Bin-based version x = np.linspace( expon.ppf(0.01), expon.ppf(0.99), numSamples ) # Makes numSamples samples with a probability ranging from .99 to .1 expPDF = expon.pdf(x)
def returnDistData(cls, self): gammaParam = gamma.fit(10**(self.data / 10)) gammaDist = gamma.pdf(self.data, *gammaParam) rayleighParam = rayleigh.fit(self.data) rayleighDist = rayleigh.pdf(self.data, *rayleighParam) normParam = norm.fit(self.data) normDist = norm.pdf(self.data, *normParam) logNormParam = lognorm.fit(self.data) lognormDist = lognorm.pdf(self.data, *logNormParam) nakagamiParam = nakagami.fit(self.data) nakagamiDist = nakagami.pdf(self.data, *nakagamiParam) exponParam = expon.fit(self.data) exponDist = expon.pdf(self.data, *exponParam) exponweibParam = exponweib.fit(self.data) weibDist = exponweib.pdf(self.data, *exponweibParam) distDF = pd.DataFrame(np.column_stack([ gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist, exponDist, weibDist ]), columns=[ 'gammaDist', 'rayleighDist', 'normDist', 'lognormDist', 'nakagamiDist', 'exponDist', 'weibDist' ]) self.distDF = distDF
def main(): symbol = 'BTCUSDT' #symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'ETHBTC', 'LTCBTC', 'LTCETH'] #symbols = ['ETHUSDT', 'LTCUSDT', 'ETHBTC', 'LTCBTC', 'LTCETH'] #trades = get_trades('BTCUSDT', datetime.datetime.timestamp(datetime.datetime(2019, 6, 1)) * 1000, 24 * 365) trades = get_trades( symbol, datetime.datetime.timestamp(datetime.datetime(2019, 6, 1)) * 1000, 1200) with open(symbol + '.json', 'w') as f: json.dump(trades, f) previous_time = None interarrival_times = [] for trade in trades: time = trade['time'] if previous_time is not None: interarrival_times.append(time - previous_time) previous_time = time plt.hist(interarrival_times, 100, density=True) loc, scale = expon.fit(interarrival_times, loc=0) x = np.linspace(0, 2000, 100) plt.plot(x, expon.pdf(x, loc=loc, scale=scale)) plt.show()
def plot_int(self, loc, scale): bins = np.arange(min(self.int_his), max(self.int_his), self.delta_t) plt.hist(self.int_his, bins, color="turquoise", normed=1, lw=0) plt.xlabel("interspike interval (s)") y = expon.pdf(bins, loc, scale) plt.plot(bins, y, "--", color="darkblue") plt.margins(0, None)
def plot_histogram(data_sets, attribute_names, range, fit_dist='', ylabel='', title='', bins=20, normed=1, histtype='bar', facecolor='#0099FF', log=0): data_to_plot = retrieve_attributes_by_name(data_sets, attribute_names) n, bins, patches = plt.hist(data_to_plot[0], bins=bins, range=range, normed=1, histtype=histtype, facecolor='#0099FF', log=0) if fit_dist is 'normal': mu, sigma = data_to_plot.mean(), data_to_plot.std() # obtain samplemean and sample standard deviation from array y = mlab.normpdf( bins, mu, sigma) # obtain the corresponding distribution l = plt.plot(bins, y, 'r--', linewidth=1) # plot the distribution if fit_dist is 'uniform': uniform_pdf = 1 / (range[1] - range[0]) # U_pdf(x) := 1 / (b - a), horizontal line with (a, b)=range l = plt.plot(range, (uniform_pdf, uniform_pdf), 'r-', linewidth=2) # plot the uniform pdf if fit_dist is 'exponential': k = data_to_plot.mean() # the sample mean is MLE estimate of lambda ('k') of exp. dist. x = np.linspace(expon.ppf(0.01, scale=k), expon.ppf(0.99, scale=k), 100) l = plt.plot(x, expon.pdf(x, scale=k), 'r--', linewidth=1) if fit_dist is 'lognormal': data_to_plot[np.where(data_to_plot==0)] = 0.0001 x = np.log(data_to_plot) print x mu, sigma = x.mean(), x.std() print '%d %d', mu, sigma y = lognorm.pdf(x=bins, s=sigma, loc=mu, scale=math.exp(mu)) l = plt.plot(bins, y, 'r--', linewidth=1) plt.ylabel(ylabel) plt.xlabel(attribute_names) plt.title(title) plt.grid(True) plt.show()
def pdf(self, x: float): """Find the PDF for a certain x value. Args: x (float): The value for which the PDF is needed. """ return expon.pdf(x, scale=self.scale)
def testExponOneEvent(self): """ generate and fit an exponential distribution with lifetime of 25 make a plot in testExpon.png """ tau = 25.0 nBins = 400 size = 100 x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size tvf = timeHgValues.astype(np.double) tvf[tvf<1] = 1e-3 # the plot looks nicer if zero values are replaced plt.plot(x, tvf, label="data") plt.plot(x, fit, label="fit") plt.yscale('log') plt.xlim(xmax=100) plt.ylim(ymin=0.09) plt.legend() plt.title("true tau=%.1f fit tau=%.1f"%(tau,param[1])) plt.savefig(inspect.stack()[0][3]+".png")
def func_2b11(repeat_times, sample_number): result = [0] * repeat_times result_mean = 0 result_variance = 0 lamda = 1.0 / (np.log(function1(0.8) / function1(1.8))) envelope_size = expon.cdf(3, loc=0, scale=lamda) - expon.cdf( 0.8, loc=0, scale=lamda) for i in range(0, repeat_times): for j in range(0, sample_number): x = rd.uniform(0.8, 3) result[i] += (function1(x) * envelope_size) / ( expon.pdf(x, loc=0, scale=lamda) * sample_number) result_mean += result[i] / repeat_times for i in range(0, repeat_times): result_variance += (result[i] - result_mean)**2 result_variance /= repeat_times print "The Variance of the 50 samples is ", result_variance print "The average of this", repeat_times, "samples is: ", result_mean plt.scatter(np.arange(0, repeat_times), result) plt.title( "Function 1 with Monte Carlo Estimation Imported with Importance Sampling" ) plt.xlabel("Trial") plt.ylabel("Estimation Result") plt.grid(True) plt.show() print "\n\n\n\n"
def testExpon(self): """ generate and fit an exponential distribution with lifetime of 25 make a plot in testExpon.png """ tau = 25.0 nBins = 400 size = 100 x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) with warnings.catch_warnings(): warnings.simplefilter("ignore") # Note: this line casus a RuntimeWarning in optimize.py:301 param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size tvf = timeHgValues.astype(np.double) #tvf[tvf<1] = 1e-3 # the plot looks nicer if zero values are replaced plt.plot(x, tvf, label="data") plt.plot(x, fit, label="fit") plt.yscale('symlog', linthreshy=0.9) plt.xlim(xmax=100) plt.ylim(ymin = -0.1) plt.legend() plt.title("true tau=%.1f fit tau=%.1f"%(tau,param[1])) plt.savefig(inspect.stack()[0][3]+".png")
def C(A): m = mean(A) if m == 0: m = 1 likelihood = expon.pdf(A, 0, m) return -2 * sum([log(i) for i in likelihood])
def expon_dcdf(x, d, scale=1): """ d^th derivative of the cumulative distribution function at x of the given RV. :param x: array_like quantiles :param d: positive integer derivative order of the cumulative distribution function :param scale: positive number scale parameter (default=1) :return: array_like If d = 0: the cumulative distribution function evaluated at x If d = 1: the probability density function evaluated at x If d => 2: the (d-1)-density derivative evaluated at x """ if d < 0 | (not isinstance(d, int)): print("D must be a non-negative integer.") return float('nan') if d == 0: output = expon.cdf(x, scale=scale) if d >= 1: output = ((-1/scale) ** (d - 1)) * expon.pdf(x, scale=scale) return output
def testExponManyEvents(self): """ generate and fit an exponential distribution with lifetime of 25 make a plot in testExponManyEvents.png """ tau = 25.0 nBins = 400 size = 100 taulist = [] for i in range(1000): x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size print "i=",i," param[1]=",param[1] taulist.append(param[1]) hist,bins = np.histogram(taulist, bins=20, range=(15,25)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 plt.step(center, hist, where = 'post') plt.savefig(inspect.stack()[0][3]+".png")
def testExponaverage(self): """ generate and fit a histogram of an exponential distribution with many events using the average time to find the fit. The histogram is then saved in testExponaverage.png """ tau = 25.0 nBins = 400 size = 100 taulist = [] for i in range(1000): x = range(nBins) yPlot = funcExpon(xPlot, *popt) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = sum(timeStamps)/len(timeStamps) fit = expon.pdf(x,param) fit *= size taulist.append(param) hist,bins = np.histogram(taulist, bins=20, range=(15,35)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 #plt.bar(center, hist, align = 'center', width = width) produces bar graph plt.step(center, hist, where = 'post') plt.savefig(inspect.stack()[0][3]+".png")
def plot(p2D,pens,p2De,para,params_exp,thickness,tempres): #print tempres plt.subplot(511) plt.hist(p2D[:,5], bins=20, normed=True) x = np.linspace(0, 5, 80) if para !=None : plt.plot(x, maxwell.pdf(x, *para),'r',x,maxwell.cdf(x, *para), 'g') plt.subplot(512) plt.hist(pens, bins=20, normed=True) z=np.linspace(0,500,200) plt.xlim(0,2*thickness) plt.plot(z, expon.pdf(z, *params_exp),'r')#plt.plot(z, expon.pdf(z, *params_exp),'r',z,expon.cdf(z, *params_exp), 'g') plt.subplot(513) plt.xlim(0,thickness+1) plt.plot(p2D[:,0],p2D[:,1],'b.') plt.subplot(514) plt.ylim(-1*10**6,1*10**6) plt.xlim(0,thickness+1) plt.plot(p2De[:,0],p2De[:,1],'b.') plt.subplot(515) plt.plot(tempres[:,0],tempres[:,1],'r-') plt.savefig() plt.show() return
def fit(x, maxiter=100, l=1, pi=0.5): mu = x.mean() sigma = x.std() for _ in range(maxiter): if pi < EPS or l < EPS: l = 1 pi = 0 mu = x.mean() sigma = x.std() tau = np.zeros(shape=x.shape) break if 1 - pi < EPS or sigma < EPS: pi = 1 mu = 0 sigma = 1 l = x.mean() tau = np.ones(shape=x.shape) break p_expon = pi * expon.pdf(x, scale=l) p_norm = (1 - pi) * norm.pdf(x, loc=mu, scale=sigma) tau = p_expon / (p_expon + p_norm) pi, oldpi = tau.mean(), pi l = (x * tau).sum() / tau.sum() mu = (x * (1 - tau)).sum() / (1 - tau).sum() diffs = x - mu sigma = np.sqrt((diffs * diffs * (1 - tau)).sum() / (1 - tau).sum()) if abs(pi - oldpi) < EPS: break return tau, pi, l, mu, sigma
def plot_exponential_distribution(x, _lambda): scale = 1 / _lambda plt.plot(x, expon.pdf(x, scale=scale), lw=3, alpha=0.7, label='$\lambda$ = %s' % _lambda)
def get_skewed_random_sample(self, n, slope=-1.0): """ Randomly choose an index from an array of some given size using a scaled inverse exponential n: length of array slope: (float) determines steepness of the probability distribution -1.0 by default for slightly uniform probabilities skewed towards the left < -1.0 makes it more steep and > -1.0 makes it flatter slope = -n generates an approximately uniform distribution """ inv_l = 1.0 / (n**float(slope)) # 1/lambda x = np.array([i for i in range(0, n)]) # list of indices # generate inverse exponential distribution using the indices and the inverse of lambda p = expon.pdf(x, scale=inv_l) # generate uniformly distributed random number and weigh it by total sum of pdf from above rand = np.random.random() * np.sum(p) for i, p_i in enumerate(p): # chooses an index by checking whether the generated number falls into a region around # that index's probability, where the region is sized based on that index's probability rand -= p_i if rand < 0: return i return 0
def plot(p2D, pens, p2De, para, params_exp, thickness): plt.subplot(411) plt.hist(p2D[:, 5], bins=10, normed=True) x = np.linspace(0, 5, 80) plt.plot(x, maxwell.pdf(x, *para), 'r', x, maxwell.cdf(x, *para), 'g') plt.subplot(412) plt.hist(pens, bins=20, normed=True) z = np.linspace(0, 500, 200) plt.xlim(0, 2 * thickness) plt.plot( z, expon.pdf(z, *params_exp), 'r' ) #plt.plot(z, expon.pdf(z, *params_exp),'r',z,expon.cdf(z, *params_exp), 'g') plt.subplot(413) plt.xlim(0, thickness + 1) plt.plot(p2D[:, 0], p2D[:, 1], 'b.') plt.subplot(414) plt.ylim(-1 * 10**6, 1 * 10**6) plt.xlim(0, thickness + 1) plt.plot(p2De[:, 0], p2De[:, 1], 'b.') plt.show() return
def truncexponprior_pdf(data, prior, c): epsilon = 1e-200 term1 = prior * (data == 1.0) term2 = (1 - prior) * (expon.pdf(data, scale=c, loc=0.0) / (expon.cdf(1.0, scale=c, loc=0.0) - expon.cdf(0.0, scale=c, loc=0.0))) * (data < 1.0) return term1 + term2 + epsilon
def displayFits(self): """ generates two histograms on the same plot. One uses maximum likelihood to fit the data while the other uses the average time. """ tau = 25.0 nBins = 400 size = 100 taulist = [] taulistavg = [] for i in range(1000): x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = sum(timeStamps)/len(timeStamps) fit = expon.pdf(x,param) fit *= size taulistavg.append(param) for i in range(1000): x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size taulist.append(param[1]) hist,bins = np.histogram(taulistavg, bins=20, range=(15,35)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 plt.step(center, hist, where = 'post', label="averagetime", color='g') hist,bins = np.histogram(taulist, bins=20, range=(15,35)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 plt.step(center, hist, where = 'post', label="maxlikelihood") plt.legend() plt.savefig(inspect.stack()[0][3]+".png")
def observe_intensities(p, intensities, error_model): for c in range(len(p.shape) - 1): lenc = p.shape[1 + c] normidx = array(range(1, lenc)) pdf_tensor = zeros(lenc) pdf_tensor[0] = expon.pdf(intensities[c], 0, 1 / error_model.bg_lambda) pdf_tensor[1:lenc] = norm.pdf(intensities[c], error_model.mu * normidx, error_model.sigma * (normidx**.5)) multiply(pdf_tensor, p, 1 + c)
def simulate_and_forward_density(distrub, par=None): if distrub == 'r': insertion_spot = expon.rvs() q = expon.pdf(insertion_spot) else: insertion_spot = uniform.rvs() branch_length = par q = uniform.pdf(insertion_spot * branch_length, scale=branch_length) return insertion_spot, q
def fit_exponential_sp(trace,plot=False): loc,scale = expon.fit(trace[:,4],floc=0) if plot == True: xmax = max(trace[:,4]) xmin = min(trace[:,4]) xdata = np.linspace(xmin,xmax,num=500) plt.plot(xdata,expon.pdf(xdata,loc,scale)) plt.hist(trace[:,4],bins=50,density=True) return loc,scale
def generate_band_table(mu, sigma, gradient, n_species, lam, n_contaminants, library_size=10000): """ Generates a band table with normal variables. Parameters ---------- mu : pd.Series Vector of species optimal positions along gradient. sigma : float Variance of the species normal distribution. gradient : array Vector of gradient values. n_species : int Number of species to simulate. n_contaminants : int Number of contaminant species. lam : float Decay constant for contaminant urn (assumes that the contaminant urn follows an exponential distribution). Returns ------- generator of pd.DataFrame Ground truth tables. pd.Series Metadata group categories, and sample information used for benchmarking. pd.Series Species actually differentially abundant. """ xs = [norm.pdf(gradient, loc=mu[i], scale=sigma) for i in range(len(mu))] table = closure(np.vstack(xs).T) x = np.linspace(0, 1, n_contaminants) contaminant_urn = closure(expon.pdf(x, scale=lam)) contaminant_urns = np.repeat(np.expand_dims(contaminant_urn, axis=0), table.shape[0], axis=0) table = np.hstack((table, contaminant_urns)) s_ids = ['F%d' % i for i in range(n_species)] c_ids = ['X%d' % i for i in range(n_contaminants)] table = closure(table) metadata = pd.DataFrame({'gradient': gradient}) metadata['n_diff'] = len(mu) metadata['n_contaminants'] = n_contaminants metadata['library_size'] = library_size # back calculate the beta metadata['effect_size'] = np.max(mu) / np.max(gradient) metadata.index = ['S%d' % i for i in range(len(metadata.index))] table = pd.DataFrame(table) table.index = ['S%d' % i for i in range(len(table.index))] table.columns = s_ids + c_ids ground_truth = list(table.columns)[:n_species] return table, metadata, ground_truth
def _margin_tail_pdf(self, x, i): # density of GP approximation (no need to weight it by p, that's done elsewhere) # i = component index if self.shapes[i] != 0: return gp.pdf(x, c=self.shapes[i], loc=self.u[i], scale=self.scales[i]) else: return expdist.pdf(x, loc=self.u[i], scale=self.scales[i])
def plot_expon_dist(lambda__, ax, num_points=1000): x = np.linspace(expon.ppf(0.01, lambda__), expon.ppf(0.99, lambda__), num_points) label = r'$Exp(' + str(lambda__) + ')$' ax.plot(x, expon.pdf(x, lambda__), label=label, linewidth=4.0, alpha=0.8) ax.set_title(label)
def get_expon_dist_random(n): inv_l = 1.0 / (n**float(-1)) x = np.array([i for i in range(0, n)]) p = expon.pdf(x, scale=inv_l) rand = np.random.random() * np.sum(p) for i, p_i in enumerate(p): rand -= p_i if rand < 0: return i return 0
def MLE_plt(categories, inter_arrivals, inter_arrival_means): cat_means = cat_mean(inter_arrivals, categories) for i in range(0, len(categories)): #X = np.asarray(extract_cat_samples(categories.inter_arrivals,categories.categories,i))#for single inter-arrivals in a category #X = np_matrix(categories.categories[i][0])#for avg(inter-arrival)/person in a category data = [0] * len(categories[i][0]) for j in range(0, len(categories[i][0])): data.append(inter_arrival_means[categories[i][0][j]]) X = np.asarray(data) param = expon.fit(X) # distribution fitting sample_mean = cat_means[i] #rate_param = 1.0/sample_mean #fitted_pdf = expon.pdf(X,scale = 1/rate_param) # rate_param_estimate = exp_rate_param_estimate(sample_means) max_sample = max_interarrival_mean(categories, inter_arrivals, i) X_plot = np.linspace(0, 2 * sample_mean, 2000)[:, np.newaxis] fitted_pdf = expon.pdf(X_plot, loc=param[0], scale=param[1]) # Generate the pdf (fitted distribution) #kde = KernelDensity(kernel='gaussian', bandwidth=4).fit(X) #KDEs.append(kde) #to use for prob_return() #max_sample = max_interarrival_mean(categories.categories,categories.inter_arrivals,i) #X_plot = np.linspace(0,1.5*max_sample,2000)[:, np.newaxis] #log_dens = kde.score_samples(X_plot) fig = plt.figure() #plt.plot(X_plot[:, 0], np.exp(log_dens), '-',label="kernel = '{0}'".format('gaussian')) plt.plot(X_plot[:, 0], fitted_pdf, "red", label="Estimated Exponential Dist", linestyle="dashed", linewidth=1.5) #plt.draw() #plt.pause(0.001) plt.title( "Parametric MLE (exponential distribution) for category=%s Visitors" % (i)) plt.hist(X, bins=40, normed=1, color="cyan", alpha=.3, label="histogram") #alpha, from 0 (transparent) to 1 (opaque) #plt.hist(combine_inner_lists(extract_cat_samples(categories.inter_arrivals,categories.categories,i)),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque) #plt.hist(np.asarray(categories[i][0]),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque) plt.xlabel("inter-arrival time (days)") plt.ylabel("PDF") plt.legend() save_as = './app/static/img/cat_result/mle/mleplt_cat' + str( i) + '.png' # dump results into mle folder plt.savefig(save_as) plt.show(block=False) plt.close(fig)
def plot_fig3(self): fig = plt.figure(3, figsize=(20, 10), dpi=300) # PSD_Y sp = fig.add_subplot(221) sp.loglog(self.f, self.PSD_Y, 'k.', ms=1) sp.loglog(self.f, self.PSD_Y_fit, 'r', lw=2) sp.set_xlabel('Frequency (Hz)') sp.set_ylabel('PSD_Y [$V^2$ s]') if self.axis == 'Y': sp.set_title( 'beta = %d +/- %d [nm/V], k = %.3f +/- %.3f [pN/nm], fc = %d +/- %d [Hz]' % (self.beta, self.dbeta, self.kappa, self.dkappa, self.fc_Y, self.dfc_Y)) else: sp.set_title('fc = %d +/- %d (Hz)' % (self.fc_Y, self.dfc_Y)) # Residual_Y sp = fig.add_subplot(222) sp.plot(self.f[self.f != self.fd], self.residual_Y, 'k.', ms=1) sp.axhline(y=1, color='r', linestyle='solid', linewidth=2) sp.set_xlabel('Frequency [Hz]') sp.set_ylabel('Normalized PSD_Y (Exp/Fit)') sp = fig.add_subplot(223) sp.hist(self.residual_Y, bins=20, color='k', histtype='step', density=True) y = np.linspace(min(self.residual_Y), max(self.residual_Y), 100) sp.plot(y, norm.pdf(y, loc=1, scale=1 / (self.N_avg)**0.5), 'r') sp.set_yscale('log') sp.set_xlabel('Normalized PSD_Y (Exp/Fit)') sp = fig.add_subplot(224) y = np.linspace(0, max(self.residual_Y0.flatten()), 100) sp.hist(self.residual_Y0.flatten(), bins=20, color='k', histtype='step', density=True) sp.plot(y, expon.pdf(y), 'r') sp.set_yscale('log') sp.set_xlabel('Normalized PSD_Y0 (Exp/Fit)') # # PSD_XY # sp = fig.add_subplot(336) # sp.plot(self.f, self.PSD_XY, 'k', lw=1) # sp.set_xlabel('Frequency (Hz)') # sp.set_ylabel('PSD_XY') fig.savefig(os.path.join(self.dir, 'Fig3_PSD_Y.png')) plt.close(fig)
def plot_(): fig, subplot = plt.subplots(1, 1) #lambda_ = distribution[1] #scale_ = 1 / lambda_ linspace = np.linspace(0, 10, 1000) rel = (1 - expon.cdf(linspace, expon.pdf(linspace, scale=5))) print(rel) subplot.plot(linspace, rel) plt.show()
def approximating_dists(data,bins): try : exp_param = expon.fit(data) except: print "screwed expon fit " #print "params for exponential ", exp_param try: pdf_exp_fitted = expon.pdf(bins, *exp_param[:-2],loc=exp_param[0],scale=exp_param[1]) # fitted distribution except : print " returning as nothing to plot " return [exp_param, pdf_exp_fitted]
def allclayton(x, y, thetaInit=1.4): sample = len(x) # Convert to normall # result = allnorm(x, y) u = result["u"] v = result["v"] sigma = result["sigma"] hes_norm = result["hes_norm"] # x - mean, y - mean # xbar = x - sigma[2] ybar = y - sigma[3] # Calculate theta # data = [] for i in range(len(u)): data.append([u[i][0], v[i][0]]) data = np.array(data) cop = ClaytonCopula(theta=thetaInit) cop.fit(data) theta = cop.params # Save frequent calculations # v_pow_minus_theta = v ** (-theta) u_pow_minus_theta = u ** (-theta) minus_sample = -sample # Find logLikelihood of theta # cop1 = (sample * np.log(1 + theta)) - ((theta + 1) * np.sum(np.log(u * v))) - ((np.sum(np.log((u ** (-theta)) + (v ** (-theta)) -1))) * (2 + (1/theta))) - (0.5 * sample * np.log(2 * pi * (sigma[0] ** 2))) - (0.5 * np.sum(xbar ** 2) / (sigma[0] ** 2)) - (0.5 * sample * np.log(2 * pi * (sigma[1] ** 2))) - (0.5 * np.sum(ybar ** 2) / (sigma[1] ** 2)) # Calculate hessian of log-copula's density # hes_cop = (minus_sample / ((theta + 1)**2)) -2 * (theta ** (-3) * np.sum(np.log(u_pow_minus_theta + v_pow_minus_theta - 1))) - 2*(theta ** (-2)) * np.sum(np.divide(np.multiply(np.log(u), u_pow_minus_theta) + np.multiply(np.log(v), v_pow_minus_theta),u_pow_minus_theta + v_pow_minus_theta - 1)) - (2 + (1/theta))*np.sum(np.divide(np.multiply(np.multiply(np.log(u) ** 2, u ** (-theta)) + np.multiply(np.log(v) ** 2, v_pow_minus_theta), u_pow_minus_theta + v_pow_minus_theta - 1) - ((np.multiply((u_pow_minus_theta), np.log(u)) + np.multiply((v_pow_minus_theta), np.log(v)))** 2), (((u ** (-theta)) + (v ** (-theta)) - 1) ** 2))) s = minus_sample / hes_cop hes_prior_cop = -1 / (s ** 2) # Opou loc valame scale apo ton deutero log kai meta. log_prior = np.log(norm.pdf(theta, loc=0, scale=s)) + np.log(expon.pdf(sigma[0], scale=1)) + np.log(expon.pdf(sigma[1], scale=1)) BF = 1 # Vgazoume to inv kai vazoume -1/ BFu = cop1 + log_prior + 0.5 * np.log(-1/(det(hes_norm) * (hes_cop - hes_prior_cop))) hes = det(hes_norm) * (hes_cop - hes_prior_cop) if theta < 10**(-5): theta = 0 cop1 = 0 BFu = cop1 + log_prior + 0.5 * np.log(-det(np.matmul(hes_norm, hes_cop - hes_prior_cop))) result = {"theta": theta, "cop1": cop1, "hes": hes, "hes_prior_cor": hes_prior_cop, "BF": BF, "BFu": BFu} return result
def test_point_to_pdf(self, point, n_samples): point = gs.to_ndarray(point, 1) n_points = point.shape[0] pdf = self.space().point_to_pdf(point) samples = gs.to_ndarray(self.space().sample(point, n_samples), 1) result = gs.squeeze(pdf(samples)) pdf = [] for i in range(n_points): pdf.append( gs.array([expon.pdf(x, scale=point[i]) for x in samples])) expected = gs.squeeze(gs.stack(pdf, axis=0)) self.assertAllClose(result, expected)
def _pdf(self, value: float): """ Defines the exponential distribution :param value: x-value :return: Function value at point x """ if self._research_mode: return expon.pdf(value, scale=1/self.rate) else: if value >= 0: return self._rate * math.exp(-self._rate * value) else: return 0
def ow_refpt_algorithm(cmd, world, k1=4.8, k2=3.9): x, y = np.mgrid[0:world.xdim:.1, 0:world.ydim:.1] # Calculate naive distribution naive_dist = naive_algorithm(cmd, world) naive_vals = naive_dist.pdf(np.dstack((x, y))) # Find Distance to closest object ref_dists = {} for ref in world.references: directions = np.array([[1, 0], [-1, 0], [0, 1], [0, -1]]) ref_pts = np.array([estimate_reference_pt(ref, direction) for direction in directions]) possible_dists = np.dstack(np.sqrt((x - pt[0])**2 + (y - pt[1])**2) for pt in ref_pts) ref_dists[ref] = np.min(possible_dists, axis=2) #ref_dists = {ref : np.sqrt((x - ref.center[0])**2 + (y - ref.center[1])**2) for ref in world.references} min_ref_dists = np.min(np.dstack(ref_dists[ref] for ref in ref_dists), axis=2) # Difference between distance to closest object and object reference in command ref_distance_diff = ref_dists[cmd.reference] - min_ref_dists ref_distance_vals = expon.pdf(ref_distance_diff, scale=k1) # Find distance to nearest wall min_wall_dists = np.min(np.dstack((x, y, world.xdim - x, world.ydim - y)), axis=2) # Difference between distance to closest wall and object reference in command wall_distance_diff = ref_dists[cmd.reference] - min_wall_dists wall_distance_diff[wall_distance_diff < 0] = 0 # wall_distance_vals = expon.pdf(wall_distance_diff, scale=k2) mean_prob = naive_vals*ref_distance_vals*wall_distance_vals loc = np.where(mean_prob == mean_prob.max()) mean = 0.1*np.array([loc[0][0], loc[1][0]]) mv_dist = multivariate_normal(mean, naive_dist.cov) return mv_dist
def exp_statistics(data,bins): exp_param[-1,-1] exp_pdf=[-1] try : exp_param = expon.fit(data) except: exp_param[-2,-2] try: pdf_exp_fitted = expon.pdf(bins, *exp_param[:-2],loc=exp_param[0],scale=exp_param[1]) # fitted distribution except : exp_param[-1] return [exp_param, pdf_exp_fitted]
def MLE_plt(categories,inter_arrivals,inter_arrival_means): cat_means = cat_mean(inter_arrivals,categories) for i in range(0,len(categories)): #X = np.asarray(extract_cat_samples(categories.inter_arrivals,categories.categories,i))#for single inter-arrivals in a category #X = np_matrix(categories.categories[i][0])#for avg(inter-arrival)/person in a category data = [0]*len(categories[i][0]) for j in range(0,len(categories[i][0])): data.append(inter_arrival_means[categories[i][0][j]]) X = np.asarray(data) param = expon.fit(X) # distribution fitting sample_mean = cat_means[i] #rate_param = 1.0/sample_mean #fitted_pdf = expon.pdf(X,scale = 1/rate_param) # rate_param_estimate = exp_rate_param_estimate(sample_means) max_sample = max_interarrival_mean(categories,inter_arrivals,i) X_plot = np.linspace(0,2*sample_mean,2000)[:, np.newaxis] fitted_pdf = expon.pdf(X_plot,loc=param[0],scale=param[1]) # Generate the pdf (fitted distribution) #kde = KernelDensity(kernel='gaussian', bandwidth=4).fit(X) #KDEs.append(kde) #to use for prob_return() #max_sample = max_interarrival_mean(categories.categories,categories.inter_arrivals,i) #X_plot = np.linspace(0,1.5*max_sample,2000)[:, np.newaxis] #log_dens = kde.score_samples(X_plot) fig = plt.figure() #plt.plot(X_plot[:, 0], np.exp(log_dens), '-',label="kernel = '{0}'".format('gaussian')) plt.plot(X_plot[:, 0],fitted_pdf,"red",label="Estimated Exponential Dist",linestyle="dashed", linewidth=1.5) #plt.draw() #plt.pause(0.001) plt.title("Parametric MLE (exponential distribution) for category=%s Visitors"%(i)) plt.hist(X,bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque) #plt.hist(combine_inner_lists(extract_cat_samples(categories.inter_arrivals,categories.categories,i)),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque) #plt.hist(np.asarray(categories[i][0]),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque) plt.xlabel("inter-arrival time (days)") plt.ylabel("PDF") plt.legend() save_as='./app/static/img/cat_result/mle/mleplt_cat'+str(i)+'.png' # dump results into mle folder plt.savefig(save_as) plt.show(block=False) plt.close(fig)
def inferPosterior(self, state, action, prior='uniform'): """ Uses inference engine to compute posterior probability from the likelihood and prior (beta distribution). """ if prior == 'beta': # Beta Distribution self.prior = np.linspace(.01,1.0,101) self.prior = beta.pdf(self.prior,1.4,1.4) self.prior /= self.prior.sum() elif prior == 'shiftExponential': # Shifted Exponential self.prior = np.zeros(101) for i in range(50): self.prior[i + 50] = i * .02 self.prior[100] = 1.0 self.prior = expon.pdf(self.prior) self.prior[0:51] = 0 self.prior *= self.prior self.prior /= self.prior.sum() elif prior == 'shiftBeta': # Shifted Beta self.prior = np.linspace(.01,1.0,101) self.prior = beta.pdf(self.prior,1.2,1.2) self.prior /= self.prior.sum() self.prior[0:51] = 0 elif prior == 'uniform': # Uniform self.prior = np.zeros(len(self.sims)) self.prior = uniform.pdf(self.prior) self.prior /= self.prior.sum() self.posterior = self.likelihood * self.prior self.posterior /= self.posterior.sum()
def objects_algorithm(cmd, world): x, y = np.mgrid[0:world.xdim:.1, 0:world.ydim:.1] # Calculate naive distribution naive_dist = naive_algorithm(cmd, world) naive_vals = naive_dist.pdf(np.dstack((x, y))) # Find Distance to closest object ref_dists = {ref : np.sqrt((x - ref.center[0])**2 + (y - ref.center[1])**2) for ref in world.references} min_dists = np.min(np.dstack(ref_dists[ref] for ref in ref_dists), axis=2) # Difference between distance to closest object and object reference in command distance_diff = ref_dists[cmd.reference] - min_dists exp_vals = expon.pdf(distance_diff, scale=0.7) vals = naive_vals*exp_vals loc = np.where(vals == vals.max()) mean = 0.1*np.array([loc[0][0], loc[1][0]]) mv_dist = multivariate_normal(mean, naive_dist.cov) return mv_dist
def _plot_correct_weights(): # draw data from an exponential distribution from scipy.stats import expon import pylab as P scale = 1.0 data = expon.rvs(scale=scale, size=8000) bandwidth = 20/np.sqrt(data.shape[0]) range = np.array([0, 7]) n = 500 y = np.linspace(range[0], range[1], num=n) eps = 1e-5 ## Use corrected samples q = _correct_weights(data, bandwidth, range, filter=False) target_densities1 = figtree(data, y, q, bandwidth, epsilon=eps, eval="auto", verbose=True) # now try again with uncorrected densities q = np.ones(data.shape) target_densities2 = figtree(data, y, q, bandwidth, epsilon=eps, eval="auto", verbose=True) print("Smallest sample at %g" % min(data)) # plot the exponential density with max. likelihood estimate of the scale P.plot(y, expon.pdf(y, scale=np.mean(data))) P.plot(y, target_densities1 , 'ro') P.title("Gaussian Kernel Density Estimation") # P.show() P.savefig("KDE_50000_h-0.05.pdf")
def analysis_quality(data, timestamp, **options): data_pop = data.segments[0].spiketrains g = open("%s.json" % BENCHMARK_NAME, 'r') d = json.load(g) N = d['param']['N'] max_rate = d['param']['max_rate'] delta_rate = max_rate/N tstop = d['param']['tstop'] mean_intervals = 1000.0/numpy.linspace(delta_rate, max_rate, N) isi_distributions = [] for spiketrain in data_pop: isi_distributions.append(isi(spiketrain)) print spiketrain.annotations if options['plot_figure']: for i, (distr, expected_mean_interval) in enumerate(zip(isi_distributions, mean_intervals)[:8]): plt.subplot(4, 2, i + 1) counts, bins, _ = plt.hist(distr, bins=50) emi = expected_mean_interval plt.plot(bins, emi * distr.size * (numpy.exp(-bins[0]/emi) - numpy.exp(-bins[1]/emi)) * expon.pdf(bins, scale=emi), 'r-') plt.savefig("results/%s/spike_train_statistics.png" % timestamp) p_values = numpy.zeros((N,)) for i, (distr, expected_mean_interval) in enumerate(zip(isi_distributions, mean_intervals)): D, p = kstest(distr, "expon", args=(0, expected_mean_interval), # args are (loc, scale) alternative='two-sided') p_values[i] = p print expected_mean_interval, distr.mean(), D, p, distr.size # Should we use the D statistic or the p-value as the benchmark? # note that D --> 0 is better, p --> 1 is better (but p > 0.01 should be ok, I guess?) # D is less variable, but depends on N. # taking the minimum p-value means we're more likely to get a false "significantly different" result. return {'type':'quality', 'name': 'kolmogorov_smirnov', 'measure': 'min-p-value', 'value': p_values.min()}
#an algorithm which takes as an argument any probability density function (not necessarily normalized) and returns a collection of samples from the normalized distribution import numpy as np import pylab as plt import scipy from scipy.stats import norm from scipy.stats import expon gauss = lambda x: 10*norm.pdf(x) sumgauss = lambda x: 10*norm.pdf(x) + 15*norm.pdf(x,loc = 20, scale = 4) exp = lambda x: 10*expon.pdf(x) exgauss = lambda x: 10*expon.pdf(x) + 15*norm.pdf(x,loc = 20, scale = 4) def metrop(dist,n=1000,full_out=False): #TODO look up how to initialize markov-chain? init = 10 sigma = 1 samples = [] accepted = 0. logdis = lambda x: np.log(dist(x)) count = 0 while True: x_prime = np.random.normal(init,5) a = dist(x_prime)/dist(init)
x = np.arange(-3, 3, 0.001) sp2 = fig.add_subplot(322) sp2.plot(x, norm.pdf(x)) sp2.set_title('Normal') mu = 5.0 sigma = 2.0 values = np.random.normal(mu, sigma, 10000) sp3 = fig.add_subplot(323) sp3.hist(values, 50) sp3.set_title('Normal (random)') x = np.arange(0, 10, 0.001) sp4 = fig.add_subplot(324) sp4.plot(x, expon.pdf(x)) sp4.set_title('Exponential ("Power Law")') n, p = 10, 0.5 x = np.arange(0, 10, 0.001) sp5 = fig.add_subplot(325) sp5.plot(x, binom.pmf(x, n, p)) sp5.set_title('Binomial') mu = 500 x = np.arange(400, 600, 0.5) sp6 = fig.add_subplot(326) sp6.plot(x, poisson.pmf(x, mu)) sp6.set_title('Poisson') plt.show()
def idf_pow( word,parl_counter, tot_counter,doc_counter, counter_list_parl,b1,b2): h_max = math.log2(len(doc_counter)) h_word = TfIdf.entropy(word,tot_counter,doc_counter) x = math.pow(2,h_word)/math.pow(2,h_max) return (expon.pdf(h_word,scale=0.2) *TfIdf.parl_prob(word,parl_counter,doc_counter)*beta.pdf(x,b1,b2))
from scipy.stats import norm # Parte 2. Genere 1000 numeros aleatorias con una distribucion exponencial, grafique el histograma y compare con la PDF conocida de dicha distribucion. # Luego Realice 1000 sumas de 1000 numeros aleatorios con una distribucion exponencial y compare (haga un fit) a una distribucion normal, verificando el teorema del limite central. n=[] for i in range(1000): n.append(np.random.exponential(10)) # Llenamos la lista "n" con numeros aleatorios con distribución exponencial de media 10. loc1,scale1 = expon.fit(n) # obetenemos los parámetros "Scale" y "loc" de un fit sobre los datos en la lista "n". Para este caso, la media de la distribución es igual a "scale". print(scale1,loc1) #imrpimimos estos parámetros x = np.linspace(0,50, 100) y=expon.pdf(x,scale=scale1, loc=loc1) # Graficamos una distribución exponencial con media 10. f, fig1 = plt.subplots(1,1) fig1.plot(x, y,'r-', lw=5, alpha=0.6, label='expon pdf') #Graficamos x vs y (distribución) fig1.hist(n,bins=50,normed=True) #Hacemos el histograma de n. Es importante que esté normalizado. f.savefig('graficas.png') #Guardamos en una archivo las gráficas. #Hasta aca verificamos que los datos si pertenecen a la distribución dada. Ahora tenemos que repetir el proceso creando una variable que es la suma de las variables generadas. sumas=[] #En cada elemento de la lista "sumas" guardamos la suma de 1000 varables aleatorias con distribución exponencial. for i in range(1000): suma=0 for j in range(1000): suma+=np.random.exponential(10)
def likely_life(life_samp, theta): p = np.empty(NSAMP, dtype=float) for i in xrange(NSAMP): p[i] = expon.pdf(life_samp[i], scale=1/theta) return np.prod(p)
values = np.random.uniform(-10, 10, 100000) plt.hist(values, 50) plt.show() # creates a range from -3 to 3 in increments of 0.001 x = np.arange(-3, 3, 0.001) # plots a graph of the probability density function plt.plot(x, norm.pdf(x)) mu = 5 sigma = 2 values = np.random.normal(mu, sigma, 10000) plt.hist(values, 50) plt.show() # creates exponential probability density function x= np.arange(0, 10, 0.001) plt.plot(x, expon.pdf(x)) # creates binomial probability mass function n, p = 10, 0.5 x= np.arange(0, 10, 0.001) plt.plot(x, binom.pmf(x, n, p)) # creates poisson probability mass function # gets the odds of 500 NOT happening mu = 500 x = np.arange(400, 600, 0.5) plt.plot(x, poisson.pmf(x, mu))
def expon2(x,gamma): '''Exponential PDF''' return expon.pdf(x,loc=0,scale=gamma)
def signal_variability(data, subplots=False, title=None, density_limits=(-20,0), threshold_level=10): import h5py if type(data)==h5py._hl.dataset.Dataset: title = data.file.filename+data.name data = data[:,:] from numpy import histogram, log, arange, sign import matplotlib.pyplot as plt plt.figure() # plt.figure(1) if subplots: rows = subplots[0] columns = subplots[1] channelNum = 0 else: rows = 1 columns = 1 channelNum = arange(data.shape[0]) for row in range(rows): for column in range(columns): if type(channelNum)==int and channelNum>=data.shape[0]: continue print("Calculating Channel "+str(channelNum)) if type(channelNum)==int: ax = plt.subplot(rows, columns, channelNum+1) else: ax = plt.subplot(rows, columns, 1) d = data[channelNum,:] dmean = d.mean() dstd = d.std() ye, xe = histogram(d, bins=100, normed=True) if (sign(d)>0).all(): from scipy.stats import expon expon_parameters = expon.fit(d) yf = expon.pdf(xe[1:], *expon_parameters) # left_threshold, right_threshold = likelihood_threshold(d, threshold_level, comparison_distribution='expon', comparison_parameters=expon_parameters) left_threshold = 0 right_threshold = 0 else: from scipy.stats import norm yf = norm.pdf(xe[1:],dmean, dstd) left_threshold, right_threshold = likelihood_threshold(d, threshold_level, comparison_distribution='norm', comparison_parameters=(dmean, dstd)) x = (xe[1:]-dmean)/dstd ax.plot(x, log(ye), 'b-', x ,log(yf), 'r-') # ax.set_ylabel('Density') # ax.set_xlabel('STD') if rows!=1 or columns!=1: ax.set_title(str(channelNum)) ax.set_yticklabels([]) ax.set_xticklabels([]) if density_limits: ax.set_ylim(density_limits) if (sign(d)>0).all(): ax.plot(((right_threshold-dmean)/dstd, (right_threshold-dmean)/dstd), plt.ylim()) else: ax.plot(((left_threshold-dmean)/dstd, (left_threshold-dmean)/dstd), plt.ylim()) ax.plot(((right_threshold-dmean)/dstd, (right_threshold-dmean)/dstd), plt.ylim()) channelNum += 1 if title: plt.suptitle(title)
def __init__(self,evaluator, suddenness,numChanges,args,dim,noise = 0): evaluator.numEnv = int(args[0]) if noise == 0 or evaluator.numEnv == 0: y = [0]*1000 if evaluator.numEnv == 0: lenIter = 1000 else: lenIter = 2 else: x = np.linspace(0.0, 100., num=101) tt =expon.pdf(x,scale=noise,loc=0) tt = tt/np.sum(tt) if evaluator.numEnv == 2: lenIter = 200 else: lenIter = 50 y = [] for i,t in enumerate(tt): y += [int(x[i])]*int(lenIter*2*t) evaluator.noise = y costArr = ['0','0','0.01','0.03','0.1'] cost = costArr[suddenness] if evaluator.numEnv == 0: a = float(args[args.find("0Env_")+5] + "." + args[args.find("0Env_")+6:-2]) j = 1.5/a np.random.seed(2+0) x = np.linspace(0.01, 100., num=101) tt =gamma.pdf(x,a,scale=j,loc=0) tt = tt/np.sum(tt) y = [] for i,t in enumerate(tt): y += [int(11*x[i])]*int(1000*t) evaluator.env = np.random.choice([int(_) for _ in y],size=len(y),replace=False) print set(evaluator.env) evaluator.trajectory = dict() i = 0 for s in range(len(evaluator.env)): i += int(10000/numChanges) evaluator.trajectory[i] = s if evaluator.numEnv == 1: s = int(args[args.find("0Env_")+6:-2]) print(s) evaluator.env = [s,s] if 1: evaluator.trajectory = dict() evaluator.trajectory[1000] = 0 elif evaluator.numEnv == 2: evaluator.env = [0,100] if args[-4] == 'A': x2 = 0.999999 #1000000 elif args[-4] == 'B': x2 = 0.999998 #1000000 elif args[-4] == 'C': x2 = 0.999995 #1000000 elif args[-4] == 'E': x2 = 0.99999 #100000 elif args[-4] == 'F': x2 = 0.99998 #50000 elif args[-4] == 'G': x2 = 0.99995 #20000 elif args[-4] == 'V': x2 = 0.9999 #10000 elif args[-4] == 'W': x2 = 0.9998 #5000 elif args[-4] == 'X': x2 = 0.9995 #2000 elif args[-4] == 'H': x2 = 0.999 #1000 elif args[-4] == 'I': x2 = 0.9960#80 #500 elif args[-4] == 't': x2 = 0.9958#79 #400 elif args[-4] == 'j': x2 = 0.9956#78 #333 elif args[-4] == 'k': x2 = 0.9954#77 #434 elif args[-4] == 's': x2 = 0.9952#76 #434 elif args[-4] == 'm': x2 = 0.9950#75 #434 elif args[-4] == 'n': x2 = 0.9948#74 #434 #elif args[-4] == 'I': x2 = 0.9980#56#80 #500 #elif args[-4] == 't': x2 = 0.9979#54#79 #400 ##elif args[-4] == 'j': x2 = 0.9978#52#78 #333 #elif args[-4] == 'k': x2 = 0.9977#50#77 #434 #elif args[-4] == 's': x2 = 0.9976#48#76 #434 #elif args[-4] == 'm': x2 = 0.9975#46#75 #434 #elif args[-4] == 'n': x2 = 0.9974#44#74 #434 elif args[-4] == 'o': x2 = 0.9973 #434 elif args[-4] == 'p': x2 = 0.9972 #434 elif args[-4] == 'q': x2 = 0.9971 #434 elif args[-4] == 'r': x2 = 0.997 #434 elif args[-4] == 'J': x2 = 0.995 #200 elif args[-4] == 'L': x2 = 0.99 #100 if args[-3] == 'V': x3 = 0.9999 elif args[-3] == 'H': x3 = 0.999 elif args[-3] == 'L': x3 = 0.99 elif args[-3] == 'A': x3 = 0.999999 #1000000 if args[-6] == 'P': evaluator.trajectory = dict() s = 1 i = 0 while(len(evaluator.trajectory)<lenIter): if s == 0: #v5 (Very low freq in High stress) i += int(np.ceil(1000.*1./(1-x2)/numChanges)) else: i += int(np.ceil(1000.*1./(1-x3)/numChanges)) s = (s-1)*(-1) evaluator.trajectory[i] = s elif evaluator.numEnv == 3: evaluator.env = [0,11,100] if args[-5] == 'A': x1 = 0.999999 #1000000 elif args[-5] == 'E': x1 = 0.99999 #100000 elif args[-5] == 'V': x1 = 0.9999 #10000 elif args[-5] == 'H': x1 = 0.999 #1000 elif args[-5] == 'L': x1 = 0.99 #100 if args[-4] == 'A': x2 = 0.999999 #1000000 elif args[-4] == 'E': x2 = 0.99999 #100000 elif args[-4] == 'V': x2 = 0.9999 #10000 elif args[-4] == 'H': x2 = 0.999 #1000 elif args[-4] == 'L': x2 = 0.99 #100 if args[-3] == 'H': x3 = 0.999 if args[-7] == 'P': #Regular evaluator.trajectory = dict() envOrder = [0,1,0,2] s = 1 i = 0 while(len(evaluator.trajectory)<2*lenIter): if envOrder[s%4] == 1: i += int(np.ceil(1./(1-x2)/numChanges)) elif envOrder[s%4] == 2: i += int(np.ceil(1./(1-x3)/numChanges)) else: i += int(0.5*np.ceil(1./(1-x1)/numChanges)) s+=1 evaluator.trajectory[i] = envOrder[s%4] if args[-2] == 'S': evaluator.arrayCost = [] evaluator.arrayCost.append(np.loadtxt('allCostsSt_S'+'0'+'.txt')) evaluator.arrayCost.append(np.loadtxt('allCostsSt_S'+cost+'.txt')) evaluator.selection = 1 elif args[-2] == 'W': evaluator.arrayCost = [] evaluator.arrayCost.append(np.loadtxt('allCostsSt_W'+'0'+'.txt')) evaluator.arrayCost.append(np.loadtxt('allCostsSt_W'+cost+'.txt')) evaluator.selection = 0 else: print "Finish with SS or WS" raise evaluator.optVal = [evaluator.arrayCost[1][:,i].argmax() for i in range(101)] evaluator.gamma1Env = np.loadtxt("gamma1EnvOptimum.txt") ## Global variables evaluator.sud = suddenness evaluator.trajectoryX = evaluator.trajectory evaluator.trajectory = sorted([_ for _ in evaluator.trajectory]) print evaluator.trajectoryX
ax.hist(crashIntervals, bins=10) plt.title("Histogram") plt.xlabel("Interval") plt.ylabel("Frequency") # We draw an exponential pdf in the figure. # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html x = np.linspace(0, 1800, 20) # This 6500 is manually tuned. We should have an automatic normalization. # TODO: http://scikit-learn.org/stable/modules/preprocessing.html y = expon.pdf(x, 0, scale) * 6500 ax.plot(x, y, 'r-', lw=5, alpha=0.6, label='expon pdf') plt.show() log.close() print("We now do some theoretical analysis.")
print("[WIP] lambda: ", 1/mean) # do the distribution fitting dance dist = getattr(scipy.stats, "expon") param = dist.fit(traces) lambda_, mean = dist.fit(traces) print("mean (2. versuch): ", mean) print("lambda (2. versuch): ", lambda_) print param size = len(traces) #pdf_fitted = dist.pdf(scipy.arange(size), *param[:-2], loc=param[-2], scale=param[-1]) * size #pdf_fitted = dist.pdf(scipy.arange(size), scale=param[-1]) * size pdf_fitted = dist.pdf(scipy.arange(size), scale=size) *size print pdf_fitted # Set up the matplotlib figure f, ax = plt.subplots(figsize=(8, 6)) ax.set_ylabel("# of occurrences") ax.set_xlabel("requested size (in kB)") # plot trace values sns.distplot(traces, kde=False, rug=False) # plot expon(lambda_) for comparison sns.plt.plot(pdf_fitted, label='exponential') sns.plt.plot(lambda_, expon.pdf(lambda_), 'r-', lw=5, alpha=0.6, label='foo') sns.plt.legend(loc='upper right') sns.plt.show()