def mse_exp(theoretical_distribution, estimated_distribution): theoretical_lambda = theoretical_distribution[1] theoretical_scale = 1 / theoretical_lambda estimated_lambda = estimated_distribution[1] estimated_scale = 1 / estimated_lambda linspace = np.linspace(expon.ppf(0.001, scale=theoretical_scale), expon.ppf(0.999, scale=theoretical_scale), 1000) theoretical_pdf = expon.pdf(linspace, scale=theoretical_scale) estimated_pdf = expon.pdf(linspace, scale=estimated_scale) mse_pdf = mean_squared_error(theoretical_pdf, estimated_pdf) theoretical_cdf = expon.cdf(linspace, scale=theoretical_scale) estimated_cdf = expon.cdf(linspace, scale=estimated_scale) mse_cdf = mean_squared_error(theoretical_cdf, estimated_cdf) theoretical_reliability = 1 - expon.cdf(linspace, scale=theoretical_scale) estimated_reliability = 1 - expon.cdf(linspace, scale=estimated_scale) mse_reliability = mean_squared_error(theoretical_reliability, estimated_reliability) return [mse_pdf, mse_cdf, mse_reliability]
def DecayBoot(): days = 100 b = [] for i in range(1, days): b.extend([1 - expon.cdf(i, scale=100) for _ in range(390)]) a = [ 1 - expon.cdf(j, scale=100) + 0.003 for j in np.linspace(1, days, len(b)) ] fig, ax = plt.subplots(1, 2, figsize=(18, 6)) for nr, axs in enumerate(ax): if nr == 0: axs.plot(b, label='Discrete Exponential Decay', color=c[1], lw=1) axs.plot(a, label='Strictly Exponential Decay', color=c[2], lw=1) axs.set_ylabel('Relative probability of selecting observation') axs.set_xlabel('Days since observation') axs.set_ylim(-0.05, 1.05) axs.set_xlim(-2500, len(b) + 2500) axs.set_xticks(range(0, len(b) + 3900, 3900)) axs.set_xticklabels(range(0, 110, 10)) else: axs.plot(b, label='Discrete Exponential Decay', color=c[1]) axs.plot(a, label='Strictly Exponential Decay', color=c[2]) axs.set_xlim(-250, 3900 + 250) axs.set_ylim(0.87, 1.01) axs.set_xticks(range(0, 3900 + 390, 390)) axs.set_xticklabels(range(0, 11, 1)) plt.legend(loc='best') plt.savefig('Graphs/ExponDecay.pdf', bbox_inches='tight') plt.tight_layout() plt.show()
def DecayBoot(): days = 100 b = [] for i in range(1, days): b.extend([1-expon.cdf(i,scale=100) for _ in range(390)]) a = [1-expon.cdf(j, scale=100) + 0.003 for j in np.linspace(1, days, len(b))] fig, ax = plt.subplots(1, 2,figsize=(18,6)) for nr,axs in enumerate(ax): if nr == 0: axs.plot(b, label='Discrete Exponential Decay', color=c[1],lw=1) axs.plot(a, label='Strictly Exponential Decay', color=c[2],lw=1) axs.set_ylabel('Relative probability of selecting observation') axs.set_xlabel('Days since observation') axs.set_ylim(-0.05,1.05) axs.set_xlim(-2500,len(b)+2500) axs.set_xticks(range(0,len(b)+3900,3900)) axs.set_xticklabels(range(0,110,10)) else: axs.plot(b, label='Discrete Exponential Decay', color=c[1]) axs.plot(a, label='Strictly Exponential Decay', color=c[2]) axs.set_xlim(-250,3900+250) axs.set_ylim(0.87,1.01) axs.set_xticks(range(0,3900+390,390)) axs.set_xticklabels(range(0,11,1)) plt.legend(loc='best') plt.savefig('Graphs/ExponDecay.pdf',bbox_inches='tight') plt.tight_layout() plt.show()
def ExponBoot2(data): d1 = datetime.datetime(2013,03,1) shapeval = 100 data = data[datetime.datetime(2013,3,1)-datetime.timedelta(50):'20130228'] data['days_since'] = [(d1-j).days+1 for j in data.index] data['days_since_2'] = [1-expon.cdf(((d1-j).days+1),scale=shapeval) for j in data.index] data['days_since_2'] /= np.sum(data['days_since_2']) data['obs_since'] = [len(data)-j+1 for j in range(len(data))] data['obs_since_2'] = [1-expon.cdf((len(data)-j+1)/10000,scale=shapeval) for j in range(len(data))] data = data[::-1] fig,ax = plt.subplots(1,2,figsize=(20,8)) ax = ax.ravel() ax[0].plot(range(len(data)),data['obs_since_2']) ax[0].set_yticklabels('') ax[0].set_ylabel('Probability of being extracted in bootstrapping procedure') ax[0].set_xlabel('Observations Since') plt.xticks(range(len(data))[::1300]) ax[1].plot(range(len(data)),data['days_since_2']) ax[1].set_yticklabels('') ax[1].set_xlabel('Days Since') plt.xticks(range(len(data))[::1300],data['days_since'][::1300]) plt.savefig('Graphs/ExponDecay.pdf',bbox_inches='tight') plt.tight_layout() plt.show()
def func_2b11(repeat_times, sample_number): result = [0] * repeat_times result_mean = 0 result_variance = 0 lamda = 1.0 / (np.log(function1(0.8) / function1(1.8))) envelope_size = expon.cdf(3, loc=0, scale=lamda) - expon.cdf( 0.8, loc=0, scale=lamda) for i in range(0, repeat_times): for j in range(0, sample_number): x = rd.uniform(0.8, 3) result[i] += (function1(x) * envelope_size) / ( expon.pdf(x, loc=0, scale=lamda) * sample_number) result_mean += result[i] / repeat_times for i in range(0, repeat_times): result_variance += (result[i] - result_mean)**2 result_variance /= repeat_times print "The Variance of the 50 samples is ", result_variance print "The average of this", repeat_times, "samples is: ", result_mean plt.scatter(np.arange(0, repeat_times), result) plt.title( "Function 1 with Monte Carlo Estimation Imported with Importance Sampling" ) plt.xlabel("Trial") plt.ylabel("Estimation Result") plt.grid(True) plt.show() print "\n\n\n\n"
def fit_censored_data(s, t, x_cen, censor): scale0 = Exponential.fit_censored_data(s, censor) scale1 = Exponential.fit_censored_data(t, censor) censor0_prob = 1 - expon.cdf(censor, loc=0, scale=scale0) censor1_prob = 1 - expon.cdf(censor, loc=0, scale=scale1) u = (len(x_cen)/(len(s) + len(t) + len(x_cen)) - censor1_prob) \ / (censor0_prob - censor1_prob) return 1 / scale0, 1 / scale1, u
def truncexponprior_pdf(data, prior, c): epsilon = 1e-200 term1 = prior * (data == 1.0) term2 = (1 - prior) * (expon.pdf(data, scale=c, loc=0.0) / (expon.cdf(1.0, scale=c, loc=0.0) - expon.cdf(0.0, scale=c, loc=0.0))) * (data < 1.0) return term1 + term2 + epsilon
def predict(self, next_n_predict): if not self.has_spike: predictions = [self.data[-1]] * next_n_predict else: predictions = [] for diff in range(next_n_predict): since_latest_spike = len(self.data) - self.spike[-1] + diff if since_latest_spike <= self.avg_decline_length: decline_step = self.avg_decline_length - since_latest_spike if self.decline_strategy == "exponential": pred = self.decline_alpha**decline_step elif self.decline_strategy == "expectation": pred = expon.cdf(0, -decline_step, self.last_spike_height) elif self.decline_strategy == "linear": pred = self.decline_k * decline_step else: raise Exception("unknown decline strategy: %s" % self.decline_strategy) else: rise_step = since_latest_spike - self.avg_decline_length confidence = expon.cdf(0, -rise_step, self.expon_params[1]) if self.height_limit == "average": limit = self.avg_spike_height elif "max_" in self.height_limit: n = int(self.height_limit.split("_")[1]) limit = max(self.spike_height[-n:]) else: raise Exception("unknown height limit: %s" % self.height_limit) if math.log(limit) < rise_step * math.log(self.rise_alpha): pred_eia = limit else: pred_eia = self.rise_alpha**rise_step pred_ee = confidence * (self.avg_spike_height) pred_li = min(limit, self.rise_k * rise_step) if self.rise_strategy == "exponential": pred = pred_eia elif self.rise_strategy == "expectation": pred = pred_ee elif self.rise_strategy == "linear": pred = pred_li elif self.rise_strategy == "auto": if confidence < self.confidence_threshold: pred = pred_ee else: pred = max(pred_eia, pred_ee) else: raise Exception("unknown rise strategy: %s" % self.rise_strategy) predictions.append(pred.real) return self.round_non_negative_int_func(predictions)
def ivt_expon(lam, a=0, b=inf, n_samples=1): """Generate random samples from an exponential function defined by rate lambda and between points a and b. """ a_update = expon.cdf(a, scale=1 / lam) # Convert to uniform distribution space b_update = expon.cdf(b, scale=1 / lam) # and here # Get uniform distribution over [a, b] in transformed space rv_unif = uniform.rvs(loc=a_update, scale=(b_update - a_update), size=n_samples) rv_exp = (-1 / lam) * np.log(1 - rv_unif) return rv_exp
def bar_graph(dist, mu): figure, ax = plt.subplots(1, 1) # Гистограмма n = len(dist) minim = min(dist) maxim = max(dist) count_of_interval = round(1.72 * (n**(1 / 3))) x = (maxim - minim) / float(count_of_interval) h = [0] * count_of_interval for i in range(n): for j in range(1, count_of_interval): if x * j >= dist[i] >= x * (j - 1): h[j - 1] += 1 break hi = [step for step in arange(minim, maxim + x, x)] e = [(expon.cdf(hi[i], loc=0, scale=mu) - expon.cdf(hi[i - 1], loc=0, scale=mu)) * n for i in range(1, len(hi))] hi_square = chisquare(h[:min(len(h), len(e))], e[:min(len(h), len(e))], ddof=1) for j in range(5): for i in range(len(h)): if i >= len(h): break if h[i] <= 5: h[i - 1] += h[i] del h[i] count_of_interval -= 1 i = 0 print(count_of_interval) for j in range(1, count_of_interval + 1): print(x * (j - 1), x * j) # print(h) # print(e) ax.hist(dist, density=True, bins=count_of_interval, edgecolor='black') f_exp = [ 1 / mu * exp(-value_x / mu) for value_x in range(0, int(max(dist))) ] ax.plot(range(0, int(max(dist))), f_exp, c='#f3a870') # ax.text(count_of_interval - count_of_interval / 2, 1 / float(mu) - 1 / float(mu) / 2, # 'hi2 = ' + str(round(hi_square[0], 2)) + ' < ' + str(round(chi2.ppf(0.95, df=count_of_interval - 1), 2)), # fontsize=16, c='#f3a870') # print(round(hi_square[0], 2), round(chi2.ppf(0.95, df=count_of_interval - 1), 2)) plt.show()
def uniform_rescaled_ISIs(conditional_intensity, is_spike, adjust_for_short_trials=True): '''Rescales the interspike intervals (ISIs) to unit rate Poisson, adjusts for short time intervals, and transforms the ISIs to a uniform distribution for easier analysis. Parameters ---------- conditional_intensity : ndarray, shape (n_time,) The fitted model mean response rate at each time. is_spike : bool ndarray, shape (n_time,) Whether or not the neuron has spiked at that time. adjust_for_short_trials : bool, optional If the trials are short and neuron does not spike often, then the interspike intervals can be longer than the trial. In this situation, the interspike interval is censored. If `adjust_for_short_trials` is True, we take this censoring into account using the adjustment in [1]. Returns ------- uniform_rescaled_ISIs : ndarray, shape (n_spikes,) References ---------- .. [1] Wiener, M.C. (2003). An adjustment to the time-rescaling method for application to short-trial spike train data. Neural Computation 15, 2565-2576. ''' try: integrated_conditional_intensity = integrate.cumulative_trapezoid( conditional_intensity, initial=0.0) except AttributeError: # Older versions of scipy integrated_conditional_intensity = integrate.cumtrapz( conditional_intensity, initial=0.0) rescaled_ISIs = _rescaled_ISIs(integrated_conditional_intensity, is_spike) if adjust_for_short_trials: max_transformed_interval = expon.cdf( _max_transformed_interval(integrated_conditional_intensity, is_spike, rescaled_ISIs)) else: max_transformed_interval = 1 return expon.cdf(rescaled_ISIs) / max_transformed_interval
def cdf(self, x: float): """Find the CDF for a certain x value. Args: x (float): The value for which the CDF is needed. """ return expon.cdf(x, scale=self.scale)
def expon_dcdf(x, d, scale=1): """ d^th derivative of the cumulative distribution function at x of the given RV. :param x: array_like quantiles :param d: positive integer derivative order of the cumulative distribution function :param scale: positive number scale parameter (default=1) :return: array_like If d = 0: the cumulative distribution function evaluated at x If d = 1: the probability density function evaluated at x If d => 2: the (d-1)-density derivative evaluated at x """ if d < 0 | (not isinstance(d, int)): print("D must be a non-negative integer.") return float('nan') if d == 0: output = expon.cdf(x, scale=scale) if d >= 1: output = ((-1/scale) ** (d - 1)) * expon.pdf(x, scale=scale) return output
def bootstrapExpDecayGraph(data, nIterations): d1 = data.index[-1] d1 = datetime.datetime(d1.year, d1.month, d1.day) shapeval = 10 daysSince = [(d1 - j).days + 1 for j in data.index] probDist = [1 - expon.cdf(j, scale=shapeval) for j in np.unique(daysSince)] probDist /= np.sum(probDist) probDist = np.cumsum(sorted(probDist, reverse=True)) minsPerDay = data.resample("d", how="count").values[:, 0] utilizedLags = int(391 - minsPerDay[0]) bootstrapLength = 391 + utilizedLags print data exit() data = np.insert(data.values, 0, np.empty_like(data.ix[:utilizedLags, :]), axis=0) data = data.reshape((len(data) / 391, 391, data.shape[1])) uninumbers = np.random.uniform(size=(bootstrapLength, nIterations)) a = np.array([np.digitize(uninumbers[_], probDist) for _ in range(bootstrapLength)]) print a.shape exit() b = np.array([[random.choice(data[-i, :, :]) for i in a[:, q]] for q in range(nIterations)]) return b
def __init__(self, id): self._id = id self._rnd = np.random self._rnd.seed(self._id) self._healthState = HealthStat.NO_UTI self._probUTI = expon.cdf(3 * Data.DELTA_T) self._countUTIs = 0 self._probpyelonphritis = Data.PROB_PYELO self._countnopyelonephritis = 0 self._proburinalysis = Data.PROB_URINALYSIS self._counturinalysis = 0 self._countnourinalysis = 0 self._probUTIdiagnosed = Data.PROB_UTI_DIAGNOSED self._countUTIdiagnosis = 0 self._noUTIdiagnosis = 0 self._probUTIcured = Data.PROB_UTI_CURED self._countUTIcured = 0 self._countUTInotcured = 0 self._probpersistantinfection = Data.PROB_PERSISTANT_INFECTION # inverse of prob of pylonephritis self._countpersistantinfection = 0 self._countpyelonephritis = 0 self._probmodifiedantibiotics = Data.PROB_MODIFIED_ANTIBIOTICS self._countmodifiedantibiotics = 0 self._countextendedtreatment = 0 self._probinpatienttreatment = Data.PROB_INPATIENT # inverse of prob of outpatient treatment self._countinpatienttreatment = 0 self._countoutpatienttreatment = 0 self._probSTIorvaginitis = Data.PROB_STI_OR_VAG self._countSTIorvaginitis = 0 self._countnodisorderpresent = 0 self._extended_treatment_cost = 0 self._inpatient_treatment_cost = 0 self._outpatient_treatment_cost = 0
def healthMonitor(self): if len(self.hb_intervals) > 1: avg_hb_int = mean(self.hb_intervals) p_of_life= 1- expon.cdf(time(),self.last_hb,scale= avg_hb_int) return p_of_life else: return 1
def testExpon1(seed): # Check that exponential distribution is parameterized correctly ripl = get_ripl(seed=seed) ripl.assume("a", "(expon 4.0)", label="pid") observed = collectSamples(ripl, "pid") expon_cdf = lambda x: expon.cdf(x, scale=1. / 4) return reportKnownContinuous(expon_cdf, observed)
def __init__(self, id): self._id = id self._rnd = np.random self._rnd.seed(self._id) self._healthState = HealthStat.NO_UTI self._probUTI = expon.cdf(3 * Data.DELTA_T) self._countUTIs = 0 self._probpyelonphritis = 0.04 self._countnopyelonephritis = 0 self._proburinalysis = 0.769 self._counturinalysis = 0 self._countnourinalysis = 0 self._probUTIdiagnosed = 0.8481 self._countUTIdiagnosis = 0 self._noUTIdiagnosis = 0 self._probUTIcured = 0.94 self._countUTIcured = 0 self._countUTInotcured = 0 self._probpersistantinfection = 0.96 # inverse of prob of pylonephritis self._countpersistantinfection = 0 self._countpyelonephritis = 0 self._probmodifiedantibiotics = 0.75 self._countmodifiedantibiotics = 0 self._countextendedtreatment = 0 self._probinpatienttreatment = 0.2 # inverse of prob of outpatient treatment self._countinpatienttreatment = 0 self._countoutpatienttreatment = 0 self._probSTIorvaginitis = 0.291 self._countSTIorvaginitis = 0 self._countnodisorderpresent = 0 self._extended_treatment_cost = 0 self._inpatient_treatment_cost = 0 self._outpatient_treatment_cost = 0
def calc_prob(R, eps, C, k, N, scale): """Calculates the probability that GMRES converges in fewer than R iterations when the L^\infty norm of the difference is exp(scale)""" if R >= N: total_prob = 1.0 else: def G_single(x): return G(x, eps, C, k, N) def G_single_R(y): return G_single(y) - R # Find the point at which we revert to the worst-case GMRES estimate endpoint = 1.0 / (C * k) # Find the point at which the gradient is zero # And therefore the maximum on the part alpha < 1 # One can calculate this by hand gradpoint = 1.0 / (3.0 * C * k) total_prob = 0.0 if G_single(gradpoint) < R: # integrate [0,end] total_prob += expon.cdf(endpoint, scale=scale) else: if G_single(0.0) < R: lower_point = bisect(G_single_R, 0.0, gradpoint) # integrate [0,lower_point] total_prob += expon.cdf(lower_point, scale=scale) nearly_end = endpoint - 10.0**-10.0 if G_single(nearly_end) < R: higher_point = bisect(G_single_R, gradpoint, nearly_end) # integrate [higher_point,end] total_prob += (expon.cdf(endpoint, scale=scale) - expon.cdf(higher_point, scale=scale)) return total_prob
def _update_infection(self): """ Update the infection dynamics for one time increment [t, t+dt]. """ # Do nothing if no infected (to speed up simulation) if np.sum(self.I) < 1: return S = self.S I = self.I R = self.R # Home force of infection I_ij_sumj = I.sum(axis=1) N_ij_sumj = S.sum(axis=1) + I.sum(axis=1) + R.sum(axis=1) lambda_home = 0.5 * self.beta * I_ij_sumj / N_ij_sumj # Work force of infection I_ji_sumj = I.sum(axis=0) N_ji_sumj = S.sum(axis=0) + I.sum(axis=0) + R.sum(axis=0) lambda_work = 0.5 * self.beta * I_ji_sumj / N_ji_sumj M = self.S.shape[0] # number of subpopulations for i in range(M): # Normal infection rate if self.quarantine_mode in [None, 'isolation']: lambda_home_eff = lambda_home[i] lambda_work_eff = lambda_work # Distancing scenario: Modify transmission rate linearly with kappa elif self.quarantine_mode == 'distancing': lambda_home_eff = self.kappa[i, :] * lambda_home[i] lambda_work_eff = self.kappa[:, i] * lambda_work # Calculate infections # Home force of infection dSI_i = binom.rvs(S[i], expon.cdf( (lambda_home_eff + lambda_work_eff) * self.dt)) # Calculate recoveries dIR_i = binom.rvs(I[i], expon.cdf(self.mu * self.dt)) # Update system S[i] = S[i] - dSI_i I[i] = I[i] + dSI_i - dIR_i R[i] = R[i] + dIR_i self.S = S self.I = I self.R = R
def kolmogorov(alpha): """ :param alpha: the scale parameter :return: line about accepting or rejecting a hypothesis """ arr = np.random.exponential(scale=1 / alpha, size=n) # sample of size n from an exponential distribution arr = np.sort(arr) # order statistic k = np.array(range(1, len(arr) + 1)) D = np.maximum(expon.cdf(x=arr, loc=0, scale=1) - (k - 1) / len(arr), k / len(arr) - expon.cdf(x=arr, loc=0, scale=1)).max() if np.sqrt(len(arr)) * D < kolmogi(gamma): return f'D = {D:0.4f}. \nThe statistical data do NOT CONFLICT with the H0 hypothesis.\n' else: return f'D = {D:0.4f}. \nThe statistical data do CONFLICT with the H0 hypothesis.\n'
def _margin_tail_cdf(self, x, i): # CDF of GP approximation (no need to weight it by p, that's done elsewhere) # i = component index if self.shapes[i] != 0: return gp.cdf(x, c=self.shapes[i], loc=self.u[i], scale=self.scales[i]) else: return expdist.cdf(x, loc=self.u[i], scale=self.scales[i])
def func_2b1(repeat_times, sample_number): #use pdf of exponential distribution to determine the importance of each interval #determine the parameter for exponential pdf lamda = 1.0 / (np.log(function1(0.8) / function1(1.8))) sample_allocation = [0] * 10 for i in range(0, 10): x1 = expon.cdf(0.22 * i + 0.8, loc=0, scale=lamda) x2 = expon.cdf(0.22 * i + 1.02, loc=0, scale=lamda) sample_allocation[i] = sample_number * (x2 - x1) condition = expon.cdf(3, loc=0, scale=lamda) - expon.cdf( 0.8, loc=0, scale=lamda) sum_allocation = 0 for i in range(0, 9): sample_allocation[9 - i] = int(sample_allocation[9 - i] / condition) sum_allocation += sample_allocation[9 - i] sample_allocation[0] = sample_number - sum_allocation print "The allocation of Sample Numbers Derived from Exponential pdf with Lamda=", lamda, "is:\n" print sample_allocation result = [0] * repeat_times result_mean = 0 result_variance = 0 for i in range(0, repeat_times): for j in range(0, 10): for k in range(0, sample_allocation[j]): result[i] += 0.22 * function1( rd.uniform(0.22 * j + 0.8, 0.22 * j + 1.02)) / sample_allocation[j] result_mean += result[i] / repeat_times for i in range(0, repeat_times): result_variance += (result[i] - result_mean)**2 result_variance /= repeat_times print "The Variance of the 50 samples is ", result_variance print "The average of this", repeat_times, "samples is: ", result_mean plt.scatter(np.arange(0, repeat_times), result) plt.title( "Function 1 with Monte Carlo Estimation Imported with stratification") plt.xlabel("Trial") plt.ylabel("Estimation Result") plt.grid(True) plt.show() print "\n\n\n\n"
def plot_(): fig, subplot = plt.subplots(1, 1) #lambda_ = distribution[1] #scale_ = 1 / lambda_ linspace = np.linspace(0, 10, 1000) rel = (1 - expon.cdf(linspace, expon.pdf(linspace, scale=5))) print(rel) subplot.plot(linspace, rel) plt.show()
def expon_test_mq(): λ, λs, nof_arrivals = 6, [4, 3], 10000 runner = MQueueATMSimulator(λ, λs, nof_arrivals) runner.run() ws, w̅, s = runner.yield_waiting_time_results() nof_sim = 1000 nof_samples = len(ws) d0 = D([expon.cdf(w) for w in ws]) p_value = kolmogorov_smirnov(nof_sim, nof_samples, d0) print(f'Valor observado d0 ≅ {d0}') print(f'p_value ≅ {p_value}')
def predict(self, next_n): if not self.params: pred = [0] * next_n elif self.fit_model == "Sampling": pred = self.generate_samples(self.params, next_n, self.time_since_last_spike, self.spike_width_avg, self.spike_max) elif self.time_since_last_spike== 0: return [self.last]*next_n elif self.fit_model == "Weibull": pred = exponweib.cdf([x for x in range(next_n)], a = self.params[0], c= self.params[1], loc=-self.time_since_last_spike, scale = self.params[3]) * self.spike_avg else: # self.fit_model == "Expon": pred = expon.cdf([x for x in range(next_n)], -self.time_since_last_spike, self.params[1]) * self.spike_avg return self.round_non_negative_int_func(pred)
def ExponBoot2(data): d1 = datetime.datetime(2013, 03, 1) shapeval = 100 data = data[datetime.datetime(2013, 3, 1) - datetime.timedelta(50):'20130228'] data['days_since'] = [(d1 - j).days + 1 for j in data.index] data['days_since_2'] = [ 1 - expon.cdf(((d1 - j).days + 1), scale=shapeval) for j in data.index ] data['days_since_2'] /= np.sum(data['days_since_2']) data['obs_since'] = [len(data) - j + 1 for j in range(len(data))] data['obs_since_2'] = [ 1 - expon.cdf((len(data) - j + 1) / 10000, scale=shapeval) for j in range(len(data)) ] data = data[::-1] fig, ax = plt.subplots(1, 2, figsize=(20, 8)) ax = ax.ravel() ax[0].plot(range(len(data)), data['obs_since_2']) ax[0].set_yticklabels('') ax[0].set_ylabel( 'Probability of being extracted in bootstrapping procedure') ax[0].set_xlabel('Observations Since') plt.xticks(range(len(data))[::1300]) ax[1].plot(range(len(data)), data['days_since_2']) ax[1].set_yticklabels('') ax[1].set_xlabel('Days Since') plt.xticks(range(len(data))[::1300], data['days_since'][::1300]) plt.savefig('Graphs/ExponDecay.pdf', bbox_inches='tight') plt.tight_layout() plt.show()
def _cdf(self, value: float): """ Defines the cumulative exponential distribution function :param value: x-value :return: Function value at point x """ if self._research_mode: return expon.cdf(value, scale=1/self.rate) else: if value >= 0: return 1 - math.exp(-self._rate * value) else: return 0
def _get_exponential_relative_log_likelihoods(tmin, tmax, scales): total_lls = np.zeros(scales.shape) interval_t = tmin != tmax exact_t = tmin == tmax for i in range(len(scales)): # Compute likelihood. scale = scales[i] interval_log_likelihoods = np.log( expon.cdf(tmax[interval_t], scale=scale) - expon.cdf(tmin[interval_t], scale=scale)) exact_log_likelihoods = np.log(expon.pdf(tmin[exact_t], scale=scale)) total_log_likelihood = interval_log_likelihoods.sum( ) + exact_log_likelihoods.sum() total_lls[i] = total_log_likelihood max_ll = max(total_lls) relative_likelihoods = np.exp(total_lls - max_ll) return relative_likelihoods
def smirnov(alpha): """ Compute the Kolmogorov-Smirnov statistic on 2 samples. :param alpha: the scale parameter :return: line about accepting or rejecting a hypothesis """ sample_1 = np.sort(np.random.exponential(scale=1, size=n)) sample_2 = np.sort(np.random.exponential(scale=1 / alpha, size=int(n / 2))) k = np.array(range(1, len(sample_2) + 1)) D = np.maximum(expon.cdf(x=sample_2, loc=0, scale=1) - (k - 1) / len(sample_2), k / len(sample_2) - expon.cdf(x=sample_2, loc=0, scale=1)).max() criteria = kolmogi(gamma) * np.sqrt((1 / n) + (1 / (n / 2))) if D < criteria: return f'D = {D:0.4f}, criteria = {criteria:0.4f}. \n' \ f'The statistical data do NOT CONFLICT with the H0 hypothesis.' else: return f'D = {D:0.4f}, criteria = {criteria:0.4f}. \n' \ f'The statistical data do CONFLICT with the H0 hypothesis.'
def _cdf(self, x, w, lambda_, mu, sigma): """ The distribution's CDF function. :param x: np.array of point values :param w: the weight parameter. The exponential is multiplied by w and the gaussian by (1 - w) :param lambda_: The exponential distribution's parameter :param mu: The gaussian's mean parameter :param sigma: The gaussian's standard deviation parameter :return: The cdf values as an np.array the same shape as x """ return w * expon.cdf(x, scale=1 / lambda_) \ + (1 - w) * norm.cdf(x, loc=mu, scale=sigma)
def dtruncExp(params): """ Mirrors the dtruncExp truncated exponential distribution function in MGDrive-Kernels.cpp x the place in the support of the density function ( support of the normal is the whole real line, poisson is nonneg int ) r is 1/scale = rate param a is upper truncatin bounds b is lower truncation bounds """ x, r, a, b = params loc = 0 if a >= b: return "argument a is greater than or equal to b" scale = 1.0 / r Ga = expon.cdf(a, loc, scale) Gb = expon.cdf(b, loc, scale) if approxEqual(Ga, Gb): print( "Truncation interval is not inside the domain of the density function" ) density = expon.pdf(x, loc, scale) / expon.cdf(b, loc, scale) - expon.cdf( a, loc, scale) print("density is ", density) return density
def test_expon(self): from scipy.stats import expon import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) mean, var, skew, kurt = expon.stats(moments='mvsk') x = np.linspace(expon.ppf(0.01), expon.ppf(0.99), 100) ax.plot(x, expon.pdf(x), 'r-', lw=5, alpha=0.6, label='expon pdf') rv = expon() ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = expon.ppf([0.001, 0.5, 0.999]) np.allclose([0.001, 0.5, 0.999], expon.cdf(vals)) self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
def get_residuals(s, tau_fit, offset=0.5): """Returns residuals of sample `s` CDF vs an exponential CDF. Arguments: s (array of floats): sample tau_fit (float): mean waiting-time of the exponential distribution to use as reference offset (float): Default 0.5. Offset to add to the empirical CDF. See :func:`get_ecdf` for details. Returns: residuals (array): residuals of empirical CDF compared with analytical CDF with time constant `tau_fit`. """ x, y = get_ecdf(s, offset=offset) ye = expon.cdf(x, scale=tau_fit) residuals = y - ye return x, residuals
def expected_gain_given_exponential_expiration(options, sc, max_samples): ev_high = expected_value(options['H']) ev_low = expected_value(options['L']) ev_random = 0.5 * ev_high + 0.5 * ev_low p = np.zeros(max_samples, float) eg = np.zeros(max_samples, float) for trial in range(max_samples): # get cumulative probability according to normal if trial == 0: p_exp_cum = 0. else: p_exp_cum = 1 - expon.cdf(max_samples - trial, loc=0, scale=sc) pH = prob_choose_H_all_allocations(options, trial + 1) p[trial] = (1 - p_exp_cum) * pH + p_exp_cum * 0.5 eg[trial] = (1 - p_exp_cum) * (pH * ev_high + (1 - pH) * ev_low) + p_exp_cum * ev_random return p, eg
def bootstrapExpDecay(data, nIterations): d1 = data.index[-1] d1 = datetime.datetime(d1.year, d1.month, d1.day) shapeval = 100 daysSince = [(d1 - j).days + 1 for j in data.index] probDist = [1 - expon.cdf(j, scale=shapeval) for j in np.unique(daysSince)] probDist /= np.sum(probDist) probDist = np.cumsum(sorted(probDist, reverse=True)) minsPerDay = data.resample("d", how="count").values[:, 0] utilizedLags = int(391 - minsPerDay[0]) bootstrapLength = 391 + utilizedLags data = np.insert(data.values, 0, np.zeros_like(data.ix[:utilizedLags, :]), axis=0) data = data.reshape((len(data) / 391, 391, data.shape[1])) uninumbers = np.random.uniform(size=(bootstrapLength, nIterations)) a = np.array([np.digitize(uninumbers[_], probDist) for _ in range(bootstrapLength)]) b = np.array([[random.choice(data[-i, :, :]) for i in a[:, q]] for q in range(nIterations)]) return b
from scipy.stats import expon print(expon.cdf(2,0,6))
def features_to_gaussian(header, row, limits): # Does this look like a mean-variance feature file? if len(header) == 3: mean = None if 'mean' in header: mean = float(row[header.index('mean')]) if 'mode' in header: mean = float(row[header.index('mode')]) if .5 in header: mean = float(row[header.index(.5)]) if mean is None: return None if 'var' in header: var = float(row[header.index('var')]) elif 'sdev' in header: var = float(row[header.index('sdev')]) * float(row[header.index('sdev')]) else: return None if np.isnan(var) or var == 0: return SplineModelConditional.make_single(mean, mean, []) # This might be uniform if mean - 2*var < limits[0] or mean + 2*var > limits[1]: return None return SplineModelConditional.make_gaussian(limits[0], limits[1], mean, var) elif len(header) == 4: # Does this look like a mean and evenly spaced p-values? header = header[1:] # Make a copy of the list row = row[1:] mean = None if 'mean' in header: mean = float(row.pop(header.index('mean'))) header.remove('mean') elif 'mode' in header: mean = float(row.pop(header.index('mode'))) header.remove('mode') elif .5 in header: mean = float(row.pop(header.index(.5))) header.remove(.5) else: return None # Check that the two other values are evenly spaced p-values row = map(float, row[0:2]) if np.all(np.isnan(row)): return SplineModelConditional.make_single(mean, mean, []) if header[1] == 1 - header[0] and abs(row[1] - mean - (mean - row[0])) < abs(row[1] - row[0]) / 1000.0: lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] if lowv == mean: return SplineModelConditional.make_single(mean, mean, []) lowerbound = 1e-4 * (mean - lowv) upperbound = np.sqrt((mean - lowv) / lowp) sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) < mean - 3*sdev and float(limits[1]) > mean + 3*sdev: return SplineModelConditional.make_gaussian(limits[0], limits[1], mean, sdev*sdev) else: return None else: # Heuristic best curve: known tails, fit to mean lowp = min(header) lowv = np.array(row)[np.array(header) == lowp][0] lowerbound = 1e-4 * (mean - lowv) upperbound = np.log((mean - lowv) / lowp) low_sdev = brentq(lambda sdev: norm.cdf(lowv, mean, sdev) - lowp, lowerbound, upperbound) if float(limits[0]) > mean - 3*low_sdev: return None low_segment = SplineModelConditional.make_gaussian(float(limits[0]), lowv, mean, low_sdev*low_sdev) highp = max(header) highv = np.array(row)[np.array(header) == highp][0] lowerbound = 1e-4 * (highv - mean) upperbound = np.log((highv - mean) / (1 - highp)) high_scale = brentq(lambda scale: .5 + expon.cdf(highv, mean, scale) / 2 - highp, lowerbound, upperbound) if float(limits[1]) < mean + 3*high_scale: return None # Construct exponential, starting at mean, with full cdf of .5 high_segment = SplineModelConditional.make_single(highv, float(limits[1]), [np.log(1/high_scale) + np.log(.5) + mean / high_scale, -1 / high_scale]) sevenys = np.linspace(lowv, highv, 7) ys = np.append(sevenys[0:2], [mean, sevenys[-2], sevenys[-1]]) lps0 = norm.logpdf(ys[0:2], mean, low_sdev) lps1 = expon.logpdf([ys[-2], ys[-1]], mean, high_scale) + np.log(.5) #bounds = [norm.logpdf(mean, mean, low_sdev), norm.logpdf(mean, mean, high_sdev)] result = minimize(lambda lpmean: FeaturesInterpreter.skew_gaussian_evaluate(ys, np.append(np.append(lps0, [lpmean]), lps1), low_segment, high_segment, mean, lowp, highp), .5, method='Nelder-Mead') print np.append(np.append(lps0, result.x), lps1) return FeaturesInterpreter.skew_gaussian_construct(ys, np.append(np.append(lps0, result.x), lps1), low_segment, high_segment)
# add a decsription of genotype allele hgdp_snp_idx = 0 hgdp_snp_pos = int(posCommon[hgdp_snp_idx]) n_sample = len(laiList) n_topmed_snp = len(posListTopmed) first_hgdp_snp_pos = int(posCommon[0]) last_hgdp_snp_pos = int(posCommon[-1]) f.writelines("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + "\t".join(idList) + "\n") for h in xrange(n_topmed_snp): if h % 100000 == 0: print str(h) + "/" + str(n_topmed_snp) + " SNPs done" topmed_snp_pos = int(posListTopmed[h][1]) if topmed_snp_pos < first_hgdp_snp_pos: gtList = [] # need to employ weight to the closest HGDP marker w = 1 - expon.cdf(first_hgdp_snp_pos - topmed_snp_pos, scale=5000000) # p(recom=FALSE) mean length is 5MB for i in xrange(n_sample): # print str(i) + " " + str(h) # if (first_hgdp_snp_pos - topmed_snp_pos) > 5000000: #use 5MB as cut off # result = gaiListGT[i] # else: # result = laiListGT[i][0] result = convertGT_LI(gaiListGT[i], laiListGT[i][0], w) # result = [gaiListGT[i][j]*w + (1-w)*laiListGT[i][0] for j in xrange(7)] gtList.append(convertGT_2_LAI(result) + ":" + ",".join(result)) f.writelines( "\t".join(posListTopmed[h]) + "\t.\t.\t.\t.\tEDGE\t.\tCOMB:COMB_DOSAGE\t" + "\t".join(gtList) + "\n" ) # continue if topmed_snp_pos > last_hgdp_snp_pos: gtList = []