def crpsClimoCSGD(shape, obs, mean, pop): # average CRPS for climatological CSGD as a function of shape (pop and mean fixed) crps = np.zeros(len(obs), dtype='float64') Fck = 1. - pop cstd = gamma.ppf(Fck, shape) fkp1q0 = gamma.pdf(cstd, shape + 1., scale=1.) scale = (mean - 0.254 * pop) / ( shape * (pop + fkp1q0) - pop * cstd ) # assumes that precipitation amounts < 0.254 mm are considered zero shift = 0.254 - cstd * scale penalty = max( 0.005 - shape * scale - shift, 0.0) # penalize shifts that would move most of the PDF below zero betaf = beta(0.5, shape + 0.5) FckP1 = gamma.cdf(cstd, shape + 1, scale=1) F2c2k = gamma.cdf(2 * cstd, 2 * shape, scale=1) indz = np.less(obs, 0.254) indp = np.greater_equal(obs, 0.254) ystd = (obs[indp] - shift) / scale Fyk = gamma.cdf(ystd, shape, scale=1) FykP1 = gamma.cdf(ystd, shape + 1, scale=1) crps[indz] = cstd*(2.*Fck-1.) - cstd*np.square(Fck) \ + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FckP1) \ - (shape/float(math.pi))*betaf*(1.-F2c2k) crps[indp] = ystd*(2.*Fyk-1.) - cstd*np.square(Fck) \ + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FykP1) \ - (shape/float(math.pi))*betaf*(1.-F2c2k) return scale * ma.mean(crps) + penalty
def gamma_correction(obs_data, mod_data, sce_data, lower_limit=0.1, cdf_threshold=0.9999999): obs_raindays, mod_raindays, sce_raindays = [ x[x >= lower_limit] for x in [obs_data, mod_data, sce_data] ] obs_gamma, mod_gamma, sce_gamma = [ gamma.fit(x) for x in [obs_raindays, mod_raindays, sce_raindays] ] obs_cdf = gamma.cdf(np.sort(obs_raindays), *obs_gamma) mod_cdf = gamma.cdf(np.sort(mod_raindays), *mod_gamma) sce_cdf = gamma.cdf(np.sort(sce_raindays), *sce_gamma) obs_cdf[obs_cdf > cdf_threshold] = cdf_threshold mod_cdf[mod_cdf > cdf_threshold] = cdf_threshold sce_cdf[sce_cdf > cdf_threshold] = cdf_threshold obs_cdf_intpol = np.interp( np.linspace(1, len(obs_raindays), len(sce_raindays)), np.linspace(1, len(obs_raindays), len(obs_raindays)), obs_cdf) mod_cdf_intpol = np.interp( np.linspace(1, len(mod_raindays), len(sce_raindays)), np.linspace(1, len(mod_raindays), len(mod_raindays)), mod_cdf) obs_inverse, mod_inverse, sce_inverse = [ 1. / (1. - x) for x in [obs_cdf_intpol, mod_cdf_intpol, sce_cdf] ] adapted_cdf = 1 - 1. / (obs_inverse * sce_inverse / mod_inverse) adapted_cdf[adapted_cdf < 0.] = 0. initial = gamma.ppf(np.sort(adapted_cdf), *obs_gamma) * gamma.ppf( sce_cdf, *sce_gamma) / gamma.ppf(sce_cdf, *mod_gamma) obs_frequency = 1. * obs_raindays.shape[0] / obs_data.shape[0] mod_frequency = 1. * mod_raindays.shape[0] / mod_data.shape[0] sce_frequency = 1. * sce_raindays.shape[0] / sce_data.shape[0] days_min = len(sce_raindays) * sce_frequency / mod_frequency expected_sce_raindays = int(min(days_min, len(sce_data))) sce_argsort = np.argsort(sce_data) correction = np.zeros(len(sce_data)) if len(sce_raindays) > expected_sce_raindays: initial = np.interp( np.linspace(1, len(sce_raindays), expected_sce_raindays), np.linspace(1, len(sce_raindays), len(sce_raindays)), initial) else: initial = np.hstack( (np.zeros(expected_sce_raindays - len(sce_raindays)), initial)) correction[sce_argsort[:expected_sce_raindays]] = initial #correction = pd.Series(correction, index=sce_data.index) return correction
def test_multiple_levels(self) -> None: # X is a Gamma random variable, Y takes levels based on X, # and W is X with added Gaussian noise. Conditioning on noise increases MI. rng = np.random.default_rng(54) x = rng.gamma(shape=1.5, scale=1.0, size=2000) z = rng.normal(size=x.shape) w = x + z # The 1e-4 level would cause issues with the continuous-continuous algorithm # as it would be picked up in neighbor searches on the y=0 plane y = np.zeros(x.shape) y[x < 0.5] = 1e-4 y[x > 2.0] = 4 uncond = _estimate_semidiscrete_mi(w, y, k=1) cond = _estimate_conditional_semidiscrete_mi(w, y, z, k=1) # The expected MI is the discrete entropy of Y p_low = gamma_dist.cdf(0.5, a=1.5) p_high = 1 - gamma_dist.cdf(2.0, a=1.5) p_mid = 1 - p_low - p_high expected = -log(p_low) * p_low - log(p_mid) * p_mid - log( p_high) * p_high self.assertLess(uncond, cond - 0.6) self.assertAlmostEqual(cond, expected, delta=0.06)
def truncgammaprior_pdf(data, c, scale): epsilon = 1e-200 term2 = (gamma.pdf(data, c, scale=scale, loc=0.0) / (gamma.cdf(1.0, c, scale=scale, loc=0.0) - gamma.cdf(0.0, c, scale=scale, loc=0.0))) * (data < 1.0) return term2 + epsilon
def parametrized(a, No): sigma = math.sqrt(No / 2) p = (9 / 16) * (1 - gamma.cdf(a**2, 1, 0, No)) + (7 / 16) * ( 1 - (gamma.cdf(a**2, 1, 0, No) / 2 + (1 / 2) * (1 - 2 * (0.5 - math.erf(a / (sigma) / math.sqrt(2)) / 2))) ) #cdf(x, a, loc=0, scale=1) return p
def calc_r_bound(self, logk: float, b_shift: float, pABar: float): if logk >= 0.0: t = Gamma.ppf(1. - pABar, (self.input_dim + 1) / 2.0) print(t, '=>', math.exp(2. * logk) * t) pBBar = 1.0 - Gamma.cdf( math.exp(2. * logk) * t, (self.input_dim + 1) / 2.0) else: t = Gamma.ppf(pABar, (self.input_dim + 1) / 2.0) print(t, '=>', math.exp(2. * logk) * t) pBBar = Gamma.cdf( math.exp(2. * logk) * t, (self.input_dim + 1) / 2.0) # print(f'pABar = {pABar}, pBBar = {pBBar}') if pBBar > 0.5: margin = norm.ppf(pBBar)**2 # print(f'margin = {margin}') if self.sigma_k > EPS: margin -= (logk / self.sigma_k)**2 else: assert abs(logk) < EPS # print(f'margin - k = {margin}') if self.sigma_b > EPS: margin -= (math.exp(logk) * b_shift / self.sigma_b)**2 else: assert abs(b_shift) < EPS # print(f'margin - b = {margin}') if margin > 0.0: print( f'remain r = { self.sigma_b * math.exp(-logk) * math.sqrt(margin) }' ) return self.sigma_b * math.exp(-logk) * math.sqrt(margin) return 0.0
def transProb(self, stateFrom, stateTo, inspItvl): if stateFrom > stateTo: return 0 stepSize = self.failTsh / (self.nStates - 1) #step size for normal states degFrom = stateFrom * stepSize #degradation lower bound of the state degToU = (stateTo + 1) * stepSize #degradation upper bound of the state degToL = stateTo * stepSize #degradation lower bound of the state if stateTo >= self.nStates - 1: deltaDeg = self.failTsh - degFrom prob = 1 - gamma.cdf( deltaDeg, self.gammaAlpha * inspItvl, scale=self.gammaBeta) else: deltaDeg1 = degToU - degFrom prob1 = gamma.cdf(deltaDeg1, self.gammaAlpha * inspItvl, scale=self.gammaBeta) deltaDeg2 = degToL - degFrom prob2 = gamma.cdf(deltaDeg2, self.gammaAlpha * inspItvl, scale=self.gammaBeta) prob = prob1 - prob2 return prob
def crps_berngamma(y, p, shape, scale): #rate = 1/scale #return(y*(1-2*p)) q = shape * scale p1 = gamma.cdf(y, shape, scale=scale) p2 = gamma.cdf(y, shape + 1, scale=scale) return (2 * p * y * p1 - p * q * 2 * p2 + y * (1 - 2 * p) + p * p * q - p * p * q * (1 / math.pi) * sc.beta(shape + 0.5, 0.5))
def proposal(dmin, No): R = dmin / 2 sigma = math.sqrt(No / 2) p = (1012 / 1024) * (1 - gamma.cdf(R**2, 1, 0, No)) + (12 / 1024) * ( 1 - (gamma.cdf(R**2, 1, 0, No) / 2 + (1 / 2) * (1 - 2 * (0.5 - math.erf(dmin / (2 * sigma) / math.sqrt(2)) / 2))) ) #cdf(x, a, loc=0, scale=1) return p
def upper_bound(dmin, No): R = dmin / 2 sigma = math.sqrt(No / 2) p = (9 / 16) * (1 - gamma.cdf(R**2, 1, 0, No)) + (7 / 16) * ( 1 - (gamma.cdf(R**2, 1, 0, No) / 2 + (1 / 2) * (1 - 2 * (0.5 - math.erf(dmin / (2 * sigma) / math.sqrt(2)) / 2))) ) #cdf(x, a, loc=0, scale=1) return p
def sum_pools_and_contacts(function, person, all_pools, contact_rates, infectious_period_length, transmission_probability, mean_transmission_probability, overdispersion, estimated_mean=None): result = 0 person_id = person["id"] age = person["age"] # Initialize gamma distribution if necessary if overdispersion is not None: shape = overdispersion scale = mean_transmission_probability / shape pdf_tp = gamma.pdf(transmission_probability, shape, scale=scale) cdf1 = gamma.cdf(1, shape, scale=scale) cdf0 = gamma.cdf(0, shape, scale=scale) # Iterate over contact pools this person belongs to for pool_type, pools in all_pools.items(): pool_id = person[pool_type + "_id"] if pool_id > 0: pool_members = pools[pool_id] pool_size = len(pool_members) # Iterate over all members in this contact pool for member in pool_members: member_id = member[0] member_age = member[1] if member_id != person_id: # Check that this is not the same person # This is for aggregated contacts by age (participants -> contacts -> contact -> age = all) contact_rate1 = contact_rates[pool_type][age] contact_rate2 = contact_rates[pool_type][member_age] contact_probability1 = contact_rate1 / (pool_size - 1) contact_probability2 = contact_rate2 / (pool_size - 1) contact_probability = min(contact_probability1, contact_probability2) # Households are assumed to be fully connected in Stride if pool_type == "household": contact_probability = 0.999 if contact_probability >= 1: contact_probability = 0.999 # Function to sum over if overdispersion is None: if function == "mean": result += (1 - (1 - (mean_transmission_probability * contact_probability))**infectious_period_length) elif function == "variance": result += ((1 - (mean_transmission_probability * contact_probability))**infectious_period_length) * (1 - (1 - mean_transmission_probability * contact_probability)**infectious_period_length) else: if function == "mean": result += (1 - ((1 - (transmission_probability * contact_probability))**infectious_period_length) * (pdf_tp / (cdf1 - cdf0))) elif function == "ev": # E[Var(Y | X)] result += ((1 - (transmission_probability * contact_probability))**infectious_period_length) * (1 - (1 - transmission_probability * contact_probability)**infectious_period_length) * (pdf_tp / (cdf1 - cdf0)) elif function == "ve": # Var(E[Y | X]) result += (1 - (1 - transmission_probability * contact_probability)**infectious_period_length) # If we are calculating the variance of the expected value Var(E[Y | X]) if function == "ve": result = ((result - estimated_mean)**2) * (pdf_tp / (cdf1 - cdf0)) return (person_id, result)
def DiscreteShiftedGammaSIDistr(self, k): a = (self.MeanSI - 1) * (self.MeanSI - 1) / (self.sdSI * self.sdSI) b = self.sdSI * self.sdSI / (self.MeanSI - 1) if k >= 2: return k * gamma.cdf(k, a, scale=b) + (k - 2) * gamma.cdf(k - 2, a, scale=b) - 2 * (k - 1) * gamma.cdf(k - 1, a, scale=b) + a * b * (2 * gamma.cdf(k - 1, a + 1, scale=b) - gamma.cdf(k - 2, a + 1, scale=b) - gamma.cdf(k, a + 1, scale=b)) elif k == 1: return k * gamma.cdf(k, a, scale=b) - a * b * gamma.cdf(k, a + 1, scale=b) elif k == 0: return 0
def lower_bound(dmin, No): #just the same with upper bound but change the radius and plugin the circumradius R = 1.1547 #circum radius sigma = math.sqrt(No / 2) p = (9 / 16) * (1 - gamma.cdf(R**2, 1, 0, No)) + (7 / 16) * ( 1 - (gamma.cdf(R**2, 1, 0, No) / 2 + (1 / 2) * (1 - 2 * (0.5 - math.erf(R / (sigma) / math.sqrt(2)) / 2))) ) #cdf(x, a, loc=0, scale=1) return p
def truncgammaprior_pdf(data, prior, c, scale): epsilon = 1e-200 term1 = prior * (data == 1.0) term2 = (1 - prior) * (gamma.pdf(data, c, scale=scale, loc=0.0) / (gamma.cdf(1.0, c, scale=scale, loc=0.0) - gamma.cdf(0.0, c, scale=scale, loc=0.0))) * (data < 1.0) return term1 + term2 + epsilon
def simHawkesOneDay( mu: float, alpha: float, beta: float, R0: np.ndarray, nrTrainingDays: int, day: int, cases: np.ndarray, config: EMConfig, threshold: int = 1e-5, ) -> np.ndarray: assert (cases.shape[0] >= nrTrainingDays ), "The number of cases does not match the number of training days" timestamps = nrTrainingDays + day - np.array(range(nrTrainingDays + day)) if config.incubationDistribution == "weibull": intensity = weibull_min.cdf(timestamps + 0.5, c=2.453, scale=6.258) - weibull_min.cdf( timestamps - 0.5, c=2.453, scale=6.258) intensity[len(intensity) - 1] += weibull_min.cdf(0.5, c=2.453, scale=6.258) elif config.incubationDistribution == "gamma": intensity = gamma.cdf(timestamps + 0.5, a=5.807, scale=0.948) - gamma.cdf( timestamps - 0.5, a=5.807, scale=0.948) intensity[len(intensity) - 1] += gamma.cdf(0.5, a=5.807, scale=0.948) elif config.incubationDistribution == "lognormal": sigma = 0.5 mu = 1.63 intensity = lognorm.cdf( timestamps + 0.5, s=sigma, scale=np.exp(mu)) - lognorm.cdf( timestamps - 0.5, s=sigma, scale=np.exp(mu)) intensity[len(intensity) - 1] += lognorm.cdf(0.5, scale=np.exp(mu), s=sigma) elif config.incubationDistribution == "normal": intensity = norm.cdf(timestamps + 0.5, scale=alpha, loc=beta) - norm.cdf( timestamps - 0.5, scale=alpha, loc=beta) intensity[len(intensity) - 1] += norm.cdf(0.5, scale=alpha, loc=beta) else: raise NotImplementedError intensity = intensity[intensity > threshold].reshape(-1, 1) kernelRange = list( range(nrTrainingDays + day - intensity.shape[0], nrTrainingDays + day)) intensityDay = intensity * np.array( R0[kernelRange].T * cases[kernelRange]).reshape(-1, 1) intensityDay = np.round(np.sum(intensityDay) + mu) # TODO: why here poisson distribution instead of just taking expectation? misschien voor confidence interval nrTriggeredCases = np.random.poisson(intensityDay) nrTriggeredCases = min(nrTriggeredCases, swissPopulation) return nrTriggeredCases
def discretized(N = 21): """Discretizes the continuous Gamma distribution. Slots of size 1. Args: N (int): Max number. By default 21. Returns: (pandas.DataFrame): Dataframe of x and Px over time. """ probs = [] for i in range(N): P = gamma.cdf(i+1, *continuous()['gamma']) - gamma.cdf(i, *continuous()['gamma']) probs.append(P) distribution = pd.DataFrame({'x': range(N), 'Px': probs}) return distribution
def run_all_honest_count(self,thresh): """ In this scenario, everyone is honest and we just mine. We count traffic the amount of traffic generated, and therefore output CSV is different than other methods """ global RANDOMNUM, fd public_vals = [] total_hash_cnt = 0 kt_count = 0 #amount of traffic below k*t bound smallestk_count = 0 # amount of traffic when proof is among smallest seen (and below k*t) both_count =0 gamma_count = 0 max_gamma = 0 miners = [HONEST] mining_power = [1] prop_delay = self.theta/600*10 while (True): val= random.getrandbits(self.bits) total_hash_cnt+=1 if val<= self.k*self.target: kt_count+=1 mempool= [x for x in public_vals if ((total_hash_cnt-x[TIME])>=prop_delay) ] los = min([x[PROOF] for x in public_vals]+[2**self.bits]) val_record = [val,0,los,total_hash_cnt] heapq.heappush(public_vals,val_record) public_vals=heapq.nsmallest(self.k,public_vals) if len(mempool)>=self.k: largest = heapq.nsmallest(self.k,mempool)[-1] if val < largest[PROOF]: smallestk_count +=1 if val <= self.k*self.target: both_count+=1 else: smallestk_count +=1 if val <= self.k*self.target: both_count+=1 gamma_val = gamma.cdf(val,a=self.k,scale=(2**self.bits)/self.theta) if gamma_val<=thresh: gamma_count+=1 public_block= self.check_for_block(public_vals,0) if public_block is not None: #we found a block max_gamma = gamma.cdf(public_block[-1][PROOF],a=self.k,scale=(2**self.bits)/self.theta) fd.write("%d, %f, %d, %d, %d, %d, %d, %f\n" % (self.k, thresh, total_hash_cnt, kt_count, smallestk_count, both_count, gamma_count, max_gamma)) return()
def precomputeKernelPDF(alpha: float, beta: float, nrTrainingDays: int, config: EMConfig) -> np.ndarray: kernelPDF = np.zeros((nrTrainingDays, nrTrainingDays)) if config.incubationDistribution == "weibull": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = weibull_min.cdf( i - j + 0.5, c=alpha, scale=beta) - weibull_min.cdf( i - j - 1, c=alpha, scale=beta) else: kernelPDF[i, j] = weibull_min.cdf( i - j + 0.5, c=alpha, scale=beta) - weibull_min.cdf( i - j - 0.5, c=alpha, scale=beta) elif config.incubationDistribution == "gamma": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = gamma.cdf( i - j + 0.5, a=alpha, scale=beta) - gamma.cdf( i - j - 1, a=alpha, scale=beta) else: kernelPDF[i, j] = gamma.cdf( i - j + 0.5, a=alpha, scale=beta) - gamma.cdf( i - j - 0.5, a=alpha, scale=beta) elif config.incubationDistribution == "lognormal": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = lognorm.cdf( i - j + 0.5, s=alpha, scale=beta) - lognorm.cdf( i - j - 1, s=alpha, scale=beta) else: kernelPDF[i, j] = lognorm.cdf( i - j + 0.5, s=alpha, scale=beta) - lognorm.cdf( i - j - 0.5, s=alpha, scale=beta) elif config.incubationDistribution == "normal": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = norm.cdf( i - j + 0.5, scale=alpha, loc=beta) - norm.cdf( i - j - 1, scale=alpha, loc=beta) else: kernelPDF[i, j] = norm.cdf( i - j + 0.5, scale=alpha, loc=beta) - norm.cdf( i - j - 0.5, scale=alpha, loc=beta) else: raise NotImplementedError return kernelPDF
def Gamma_CDF_func(x, k, theta): ''' k = shape parameter (sometimes called a) l = location parameter theta = scale parameter (related to the rate b=1/theta) ''' return gamma.cdf(x, k, loc=1., scale=theta)
def sample_training_points(self, thetas_per_batch, samples_per_theta): # sample thetas thetas = np.random.random( thetas_per_batch) * 2 * self.theta_max - self.theta_max thetas[thetas < 0] = np.exp(thetas[thetas < 0]) # loop over theta samples z = [] u = [] theta = [] for i in range(len(thetas)): # sample z z.append(np.random.gamma(shape=thetas[i], size=samples_per_theta)) # compute target u u.append(gamma.cdf(x=z[-1], a=thetas[i])) # up-sample theta theta.append(thetas[i] * np.ones(samples_per_theta)) # convert to arrays z = np.concatenate(z) u = np.concatenate(u) theta = np.concatenate(theta) return z, u, theta
def probability_of_ruin(return_mean, return_stddev, life_expectancy, withdrawal_pct): """ Milevsky and Robinson's Stochastic Present Value from "A sustainable spending rate without simulation" (2005) In "A Gentle Introduction to the Calculus of Retirement Income" Milevsky calls this the "risk quotient" real_return: the real return of the portfolio (e.g. .07) std_dev: the volatility of the portfolio (e.g. .20) life_expectancy: the median remaining lifespan. i.e. what 50% of the population will live to. (e.g. 23) mortality_rate: the rate of dying every year (e.g. .0247) >>> probability_of_ruin(.07, .20, 28.1, .05) 0.26785503502422264 >>> probability_of_ruin(.0520, .1182, 22.30, .04) 0.097782639821254749 >>> probability_of_ruin(.0470, .1382, 22.30, .04) 0.15435694850153159 >>> probability_of_ruin(.049, .10, 22.30, .04) 0.089454318224481758 """ mortality_rate = math.log(2) / life_expectancy alpha = ((2 * return_mean) + (4 * mortality_rate)) alpha /= (return_stddev * return_stddev) + mortality_rate alpha -= 1 beta = (return_stddev * return_stddev) + mortality_rate beta /= 2 return gamma.cdf(withdrawal_pct, alpha, scale=beta)
def extractInfoFromFile(fnameF,noise,dim,candFlag = False): from scipy.stats import lognorm,gamma df = np.asarray(pd.read_csv(fnameF)) maxI = len(df[:,1])-1 cand = np.mean(df[-5:-1,3:-1],0) fit = df[-1,1] sense = df[-1,-1] if noise > 0: for line in range(10, len(df[:,1])): fit = 0.8*fit + 0.2*df[line,1] sense = 0.8*sense + 0.2*df[line,-1] cand = 0.9*cand + 0.1*df[line,3:-1] #print df[line,3:-1] if candFlag: return cand fit *= -1 th = 100 if dim == 100: d = cand/np.sum(cand) else: x = np.linspace(0.01, 10000., num=100) # values for x-axis d = np.zeros(100) w = 0 for jj in range(0,len(cand)-1,3): d += cand[jj]*gamma.cdf(x, cand[jj+1], loc=0, scale=cand[jj+2]) # probability distribution w += cand[jj] d = np.diff(np.concatenate([[0],d])) d = d/w return [d,cand],sense,fit
def truncated_gamma_logpdf( a, scale, eta, ts_above_eta, N_above_eta): """Calculates the -log(likelihood) of a sample of random numbers generated from a gamma pdf truncated from below at x=eta. Parameters ---------- a : float Shape parameter. scale : float Scale parameter. eta : float Test-statistic value at which the gamma function is truncated from below. ts_above_eta : (n_trials,)-shaped 1D ndarray The ndarray holding the test-statistic values falling in the truncated gamma pdf. N_above_eta : int Number of test-statistic values falling in the truncated gamma pdf. Returns ------- -logl : float """ c0 = 1. - gamma.cdf(eta, a=a, scale=scale) c0 = 1./c0 logl = N_above_eta*np.log(c0) + np.sum(gamma.logpdf(ts_above_eta, a=a, scale=scale)) return -logl
def get_batch_probability(self, lengths, query_nodes, labeled_nodes): assert len(lengths) == len(query_nodes) == len(labeled_nodes) assert len(lengths) > 0 lengths = np.array(lengths, dtype=np.uint32) zero_i = (lengths == 0) nonzero_i = np.invert(zero_i) distributions = self._distributions params = ( distributions[query_node, labeled_node] for query_node, labeled_node in zip(query_nodes, labeled_nodes)) shape_scale_zero = list(zip(*params)) shapes = np.array(shape_scale_zero[0], dtype=np.float64) scales = np.array(shape_scale_zero[1], dtype=np.float64) zero_prob = np.array(shape_scale_zero[2], dtype=np.float64) del shape_scale_zero ret = np.empty_like(lengths, dtype=np.float64) # ret[zero_i] = 1 - zero_prob[zero_i] ret[zero_i] = zero_prob[zero_i] # ret[zero_i] = 1.0 gamma_probs = gamma.cdf(lengths[nonzero_i], a=shapes[nonzero_i], scale=scales[nonzero_i]) greater_i = gamma_probs > 0.5 gamma_probs[greater_i] = 1 - gamma_probs[greater_i] gamma_probs = gamma_probs * 2 * (1 - zero_prob[nonzero_i]) ret[nonzero_i] = gamma_probs ret[ret <= 0.0] = ZERO_REPLACE # ret[ret > 1.0] = 1.0 return ret
def fit_gamma_param(df, xmin, mes, year_test='None', option=0): """ """ cdf_limite = .9999999 if mes - 1 <= 0: cnd = [12, 1, 2] elif mes + 1 >= 13: cnd = [11, 12, 1] else: cnd = [mes - 1, mes, mes + 1] if year_test == 'None': datos = df.loc[df['month'].isin(cnd), 'precip'].values else: id_fm = np.logical_and(df.Fecha >= '01/01/'+str(year_test), df.Fecha <= '12/31/'+str(year_test)) # generate index to work in cnd and out of year considered. im_tot = np.logical_and(df['month'].isin(cnd), np.logical_not(id_fm)) # extract data to generate the distribution of historical data. #print(np.unique(pd.DatetimeIndex(df.loc[im_tot, 'Fecha']).year.to_numpy())) #print(np.unique(pd.DatetimeIndex(df.loc[im_tot, 'Fecha']).month.to_numpy())) datos = df.loc[im_tot, 'precip'].values # Days with precipitacion in_dato = np.array([e > xmin if ~np.isnan(e) else False for e in datos], dtype=bool) precdias = datos[in_dato] # Fit a Gamma distribution over days with precipitation param_gamma = gamma.fit(precdias, floc=0) gamma_cdf = gamma.cdf(np.sort(precdias), *param_gamma) gamma_cdf[gamma_cdf > cdf_limite] = cdf_limite if option == 0: return param_gamma else: return param_gamma, precdias, gamma_cdf
def kernel_based_indepence(x, y, eigv_samples=1000, approximate=True): n_samples, _ = x.shape kx = kernel_matrix(x) ky = kernel_matrix(y) if approximate: # kx = pairwise_kernels(x, metric='rbf', gamma=np.median(pdist(x))) # ky = pairwise_kernels(y, metric='rbf', gamma=np.median(pdist(y))) # h = np.identity(n_samples) - np.full((n_samples, n_samples), 1 / n_samples) # cx = h @ kx @ h # cy = h @ ky @ h mean_appr = np.trace(kx) * np.trace(ky) / n_samples var_appr = 2 * (n_samples - 4) * (n_samples - 5) * np.linalg.norm( kx) * np.linalg.norm(ky) / (n_samples**4) k_appr = mean_appr * mean_appr / var_appr theta_appr = var_appr / mean_appr Sta = np.trace(kx @ ky) return gamma.cdf(Sta, a=k_appr, scale=theta_appr) eig_x = np.linalg.eigvalsh(kx) eig_y = np.linalg.eigvalsh(ky) z = np.random.chisquare(1, (n_samples * n_samples, eigv_samples)) eigs = np.outer(eig_x, eig_y).flatten() t_samples = np.dot(eigs, z) / (n_samples * n_samples) actual = 1 / n_samples * np.trace(kx @ ky) t, p_value = ttest_1samp(t_samples, actual) print(t, np.mean(t_samples), actual) if t < 0: p_value = 1 - p_value / 2 else: p_value = p_value / 2 return p_value
def _get_u_grid(self, recalculate=False): # Create Grid of numerical values for moment_update # Grid is based on Gamma(alpha=df/2, beta=df/2) if not hasattr(self, "_u_grid") or recalculate: # Define _u_grid q = (np.arange(0, self.breaks) + 0.5) / (self.breaks * 1.0) * 0.98 + 0.01 q = np.concatenate(( np.logspace(-5, -2, 5), q, 1.0 - np.logspace(-5, -2, 5)[::-1], )) alpha = self.parameter.df / 2.0 beta = self.parameter.df / 2.0 self._u_grid = gamma.ppf(q=q, a=alpha, scale=1.0 / beta) # Define _u_weights mid_points = (self._u_grid[1:] + self._u_grid[:-1]) / 2.0 cdf = np.concatenate(( np.zeros(1), gamma.cdf(x=mid_points, a=alpha, scale=1.0 / beta), np.ones(1), )) self._u_weights = cdf[1:] - cdf[:-1] return self._u_grid, self._u_weights
def _cdf(self, value: float): """ Defines the cumulative gamma distribution function :param value: x-value :return: Function value at point x """ return gamma.cdf(value, a=self._alpha, scale=self._beta)
def pgamma(q,shape,rate=1): """ Calculates the cumulative of the Gamma-distribution """ from scipy.stats import gamma result=gamma.cdf(x=rate*q,a=shape,loc=0,scale=1) return result
def p_value(self): if not self._p_value: a = self.alpha() b = self.beta() res = gamma.cdf(self.n * self.empirical_test(), a, scale=b) self._p_value = res return self._p_value
def cdf(self,dat): ''' Evaluates the cumulative distribution function on the data points in dat. :param dat: Data points for which the c.d.f. will be computed. :type dat: natter.DataModule.Data :returns: A numpy array containing the probabilities. :rtype: numpy.array ''' return gamma.cdf(squeeze(dat.X)**self.param['p'],self.param['u'],scale=self.param['s'])
def run(pars): verbose = pars.get('verbose', False) options = pars.get('options', None) data = pars.get('data') eval_crit = pars.get('eval_crit', 0.) eval_pow = pars.get('eval_pow', 1.) th_shape = pars.get('th_shape', 2.) th_scale = pars.get('th_scale', 1.) t_batch = np.round(pars.get('target_batch', 5)) # target total sample size s_batch = pars.get('s_batch', 1.) p_guess = pars.get('p_guess', 0.) # first evalute the trajectory samples = [0.] for trial, obs in enumerate(data['sampledata']): # evaluate the outcome samples.append(valuation([obs, data['outcomes'][trial]], eval_crit, eval_pow)) pref = np.cumsum(samples) # on each trial, the probability of crossing the boundary is # determined by the distribution over separation sizes p_stop = gamma.cdf(np.abs(pref), th_shape, scale=th_scale) p_stop[0] = 0. # probability of switching is based on streak count count_streak = count_streaks(data['sampledata']) p_stay = 1. / (1. + np.exp((np.array(count_streak) + 1 - t_batch) * s_batch)) p_samp = 1 - p_stop d = np.array(data['sampledata']) p_sample_A = p_samp[1:] * (p_stay[1:] * (d==0) + (1 - p_stay[1:]) * (d==1)) p_sample_A = np.concatenate(([.5], p_sample_A)) p_sample_B = p_samp[1:] * (p_stay[1:] * (d==1) + (1 - p_stay[1:]) * (d==0)) p_sample_B = np.concatenate(([.5], p_sample_B)) # at end of sampling, give choice probabilities return {'pref': pref, 'p_stop': p_stop, 'p_sample_A': p_sample_A, 'p_sample_B': p_sample_B}
def chi2(self,cmb,egfs,use=None,nparams=0): """ Return a tuple of (chi2, dof, pte, nsig). Parameters: ----------- cmb/egfs: The cmb and egfs result. use: Which spectra and lranges to include in the chi2. nparams: Number to subtract from the d.o.f. for calculating the PTE. """ if use is None: use=self.use use = {k:(lambda x: x if x is not None else self.use[k])(use.get(k)) for k in use} cl_model = self.get_cl_model(cmb, egfs).binned(self.signal.binning) cl_model_matrix = cl_model.get_as_matrix(lrange=use).spec signal_matrix = self.process_signal(self.signal).get_as_matrix(lrange=use) dcl = cl_model_matrix - signal_matrix.spec chi2 = dot(dcl,dot(inv(signal_matrix.cov),dcl)) k = cl_model_matrix.size - nparams pte = gamma.cdf(k/2.,chi2/2.) nsig = sqrt(2.)*erfinv(1-pte) return (chi2,k,pte,nsig)
def calcSPI(duration, model, cid): """Calculate Standardized Precipitation Index for specified month *duration*. Need a climatology of precipitation stored in the database used in a VIC *model* simulation.""" nt = (date(model.endyear, model.endmonth, model.endday) - date(model.startyear + model.skipyear, model.startmonth, model.startday)).days + 1 # tablename = "precip."+model.precip if duration < 1: print( "WARNING! Cannot calculate SPI with {0} months duration.".format(duration)) spi = np.zeros(nt) else: p = np.loadtxt("{0}/forcings/data_{1:.{3}f}_{2:.{3}f}".format(model.model_path, model.gid[cid][0], model.gid[cid][1], model.grid_decimal))[:, 0] p = pandas.Series(p, [date(model.startyear, model.startmonth, model.startday) + timedelta(t) for t in range(len(p))]) p[duration:] = pandas.rolling_mean(p.resample( 'M', how='mean'), duration).values[duration:] p[:duration] = 0.0 g1, g2, g3 = gamma.fit(p) cdf = gamma.cdf(p, g1, g2, g3) spi = norm.ppf(cdf) return spi
def pearscdf(X, mu, sigma, skew, kurt, method, k, output): # pearspdf # [p,type,coefs] = pearspdf(X,mu,sigma,skew,kurt) # # Returns the probability distribution denisty of the pearsons distribution # with mean `mu`, standard deviation `sigma`, skewness `skew` and # kurtosis `kurt`, evaluated at the values in X. # # Some combinations of moments are not valid for any random variable, and in # particular, the kurtosis must be greater than the square of the skewness # plus 1. The kurtosis of the normal distribution is defined to be 3. # # The seven distribution types in the Pearson system correspond to the # following distributions: # # Type 0: Normal distribution # Type 1: Four-parameter beta # Type 2: Symmetric four-parameter beta # Type 3: Three-parameter gamma # Type 4: Not related to any standard distribution. Density proportional # to (1+((x-a)/b)^2)^(-c) * exp(-d*arctan((x-a)/b)). # Type 5: Inverse gamma location-scale # Type 6: F location-scale # Type 7: Student's t location-scale # # Examples # # See also # pearspdf pearsrnd mean std skewness kurtosis # # References: # [1] Johnson, N.L., S. Kotz, and N. Balakrishnan (1994) Continuous # Univariate Distributions, Volume 1, Wiley-Interscience. # [2] Devroye, L. (1986) Non-Uniform Random Variate Generation, # Springer-Verlag. otpt = len(output) # outClass = superiorfloat(mu, sigma, skew, kurt) if X[1] == inf: cdist = 1 limstate = X[0] elif X[0] == -inf: cdist = 2 limstate = X[1] else: cdist = 3 limstate = X if sigma == 0: print "Warning: The standard deviation of output distribution",k,"is zero. No distribution or correlation can be calculated for it." if mu>=X[0] and mu<=X[1]: #mean is in the limits return 1, None, inf, None, None, None, None, None, None, None, None else: #mean is outside the limits return 0, None, inf, None, None, None, None, None, None, None, None X = (X - mu) / sigma # Z-score if method == 'MCS': beta1 = 0 beta2 = 3 beta3 = sigma ** 2 else: beta1 = skew ** 2 beta2 = kurt beta3 = sigma ** 2 # Return NaN for illegal parameter values. if (sigma < 0) or (beta2 <= beta1 + 1): p = zeros(otpt)+nan #p = zeros(sizeout)+nan dtype = NaN coefs = zeros((1,3))+nan print 'Illegal parameter values passed to pearscdf! (sigma:',sigma,' beta1:',beta1,' beta2:', beta2,')' return #% Classify the distribution and find the roots of c0 + c1*x + c2*x^2 c0 = (4 * beta2 - 3 * beta1)# ./ (10*beta2 - 12*beta1 - 18); c1 = skew * (beta2 + 3)# ./ (10*beta2 - 12*beta1 - 18); c2 = (2 * beta2 - 3 * beta1 - 6)# ./ (10*beta2 - 12*beta1 - 18); if c1 == 0: # symmetric dist'ns if beta2 == 3: dtype = 0 a1 = 0 a2 = 0 else: if beta2 < 3: dtype = 2 elif beta2 > 3: dtype = 7 a1 = -sqrt(abs(c0 / c2)) a2 = -a1 # symmetric roots elif c2 == 0: # kurt = 3 + 1.5*skew^2 dtype = 3 a1 = -c0 / c1 # single root a2 = a1 else: kappa = c1 ** 2 / (4 * c0 * c2) if kappa < 0: dtype = 1 elif kappa < 1 - finfo(float64).eps: dtype = 4 elif kappa <= 1 + finfo(float64).eps: dtype = 5 else: dtype = 6 # Solve the quadratic for general roots a1 and a2 and sort by their real parts csq=c1 ** 2 - 4 * c0 * c2 if c1 ** 2 - 4 * c0 * c2 < 0: tmp = -(c1 + sign(c1) * cmath.sqrt(c1 ** 2 - 4 * c0 * c2)) / 2 else: tmp = -(c1 + sign(c1) * sqrt(c1 ** 2 - 4 * c0 * c2)) / 2 a1 = tmp / c2 a2 = c0 / tmp if (real(a1) > real(a2)): tmp = a1; a1 = a2; a2 = tmp; denom = (10 * beta2 - 12 * beta1 - 18) if abs(denom) > sqrt(finfo(double).tiny): c0 = c0 / denom c1 = c1 / denom c2 = c2 / denom coefs = [c0, c1, c2] else: dtype = 1 # this should have happened already anyway # beta2 = 1.8 + 1.2*beta1, and c0, c1, and c2 -> Inf. But a1 and a2 are # still finite. coefs = zeroes((1,3))+inf if method == 'MCS': dtype = 8 #% Generate standard (zero mean, unit variance) values if dtype == 0: # normal: standard support (-Inf,Inf) # m1 = zeros(outClass); # m2 = ones(outClass); m1 = 0 m2 = 1 p = norm.cdf(X[1], m1, m2) - norm.cdf(X[0], m1, m2) lo= norm.ppf( 3.39767E-06, mu,sigma ); hi= norm.ppf( 0.999996602, mu,sigma ); Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( normcdf(X[0],m1,m2), 0,1 ); #Inv2 = norm.ppf(normcdf(X[1], m1, m2), 0, 1) elif dtype == 1: # four-parameter beta: standard support (a1,a2) if abs(denom) > sqrt(finfo(double).tiny): m1 = (c1 + a1) / (c2 * (a2 - a1)) m2 = -(c1 + a2) / (c2 * (a2 - a1)) else: # c1 and c2 -> Inf, but c1/c2 has finite limit m1 = c1 / (c2 * (a2 - a1)) m2 = -c1 / (c2 * (a2 - a1)) # r = a1 + (a2 - a1) .* betarnd(m1+1,m2+1,sizeOut); X = (X - a1) / (a2 - a1) # Transform to 0-1 interval # lambda = -(a2-a1)*(m1+1)./(m1+m1+2)-a1; # X = (X - lambda - a1)./(a2-a1); alph=m1+1 beta=m2+1 if alph < 1.001 and beta < 1.001: alph=1.001 beta=1.001 mode=(alph-1)/(alph+beta-2) if mode < 0.1: if alph > beta: alph = max(2.0,alph) beta = (alph-1)/0.9 - alph + 2 elif beta > alph: beta = max(2.0,beta) alph = (0.1*(beta -2) +1)/(1 - 0.1) elif mode > 0.9: if alph > beta: alph = max(2.0,alph) beta =(alph-1)/0.9 - alph + 2 elif beta > alph: beta = max(2.0,beta); alph = (0.1*(beta -2) +1)/(1 - 0.1) p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta) lo=a1*sigma+mu; hi=a2*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( beta.cdf(X[0],m1+1,m2+1), 0,1 ); #Inv2 = norm.ppf(beta.cdf(X[1], m1 + 1, m2 + 1), 0, 1) # X = X*(a2-a1) + a1; % Undo interval tranformation # r = r + (0 - a1 - (a2-a1).*(m1+1)./(m1+m2+2)); elif dtype == 2: # symmetric four-parameter beta: standard support (-a1,a1) m = (c1 + a1) / (c2 * 2 * abs(a1)) m1 = m m2 = m X = (X - a1) / (2 * abs(a1)) # r = a1 + 2*abs(a1) .* betapdf(X,m+1,m+1); alph=m+1; beta=m+1; if alph < 1.01: alph=1.01 beta=1.01 p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta) lo=a1*sigma+mu; hi=a2*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( beta.cdf(X[0],m+1,m+1), 0,1 ); #Inv2 = norm.ppf(beta.cdf(X[1], m + 1, m + 1), 0, 1) # X = a1 + 2*abs(a1).*X; elif dtype == 3: # three-parameter gamma: standard support (a1,Inf) or (-Inf,a1) m = (c0 / c1 - c1) / c1 m1 = m m2 = m X = (X - a1) / c1 # r = c1 .* gampdf(X,m+1,1,sizeOut) + a1; p = gamma.cdf(X[1], m + 1, 1) - gamma.cdf(X[0], m + 1, 1) lo=(gamma.ppf( 3.39767E-06, m+1, scale=1 )*c1+a1)*sigma+mu; hi=(gamma.ppf( 0.999996602, m+1, scale=1 )*c1+a1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( gamcdf(X[0],m+1,1), 0,1 ); #Inv2 = norm.ppf(gamcdf(X[1], m + 1, 1), 0, 1) # X = c1 .* X + a1; elif dtype == 4: # Pearson IV is not a transformation of a standard distribution: density # proportional to (1+((x-lambda)/a)^2)^(-m) * exp(-nu*arctan((x-lambda)/a)), # standard support (-Inf,Inf) X = X * sigma + mu r = 6 * (beta2 - beta1 - 1) / (2 * beta2 - 3 * beta1 - 6) m = 1 + r / 2 nu = -r * (r - 2) * skew / sqrt(16 * (r - 1) - beta1 * (r - 2) ** 2) a = sqrt(beta3 * (16 * (r - 1) - beta1 * (r - 2) ** 2)) / 4 _lambda = mu - ((r - 2) * skew * sigma) / 4 # gives zero mean m1 = m m2 = nu # X = (X - lambda)./a; if cdist == 1: p = 1 - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma) elif cdist == 2: p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) elif cdist == 3: p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma) lo=norm.ppf( 3.39767E-06, mu,sigma ); hi=norm.ppf( 0.999996602, mu,sigma ); Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( pearson4cdf(X[0],m,nu,a,lambda,mu,sigma), 0,1 ); #Inv2 = norm.ppf(pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma), 0, 1) # C = X.*a + lambda; # C = diff(C); # C= C(1); # p = p./(sum(p)*C); elif dtype == 5: # inverse gamma location-scale: standard support (-C1,Inf) or # (-Inf,-C1) C1 = c1 / (2 * c2) # r = -((c1 - C1) ./ c2) ./ gampdf(X,1./c2 - 1,1) - C1; X = -((c1 - C1) / c2) / (X + C1) m1 = c2 m2 = 0 p = gamma.cdf(X[1], 1. / c2 - 1, scale=1) - gamma.cdf(X[0], 1. / c2 - 1, scale=1) lo=(-((c1-C1)/c2)/gamma.ppf( 3.39767E-06, 1/c2 - 1, scale=1 )-C1)*sigma+mu; hi=(-((c1-C1)/c2)/gamma.ppf( 0.999996602, 1/c2 - 1, scale=1 )-C1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( gamcdf(X[0],1./c2 - 1,1), 0,1 ); #Inv2 = norm.ppf(gamcdf(X[1], 1. / c2 - 1, 1), 0, 1) # X = -((c1-C1)./c2)./X-C1; elif dtype == 6: # F location-scale: standard support (a2,Inf) or (-Inf,a1) m1 = (a1 + c1) / (c2 * (a2 - a1)) m2 = -(a2 + c1) / (c2 * (a2 - a1)) # a1 and a2 have the same sign, and they've been sorted so a1 < a2 if a2 < 0: nu1 = 2 * (m2 + 1) nu2 = -2 * (m1 + m2 + 1) X = (X - a2) / (a2 - a1) * (nu2 / nu1) # r = a2 + (a2 - a1) .* (nu1./nu2) .* fpdf(X,nu1,nu2); p = fcdf(X[1], nu1, nu2) - fcdf(X[0], nu1, nu2) lo=(f.ppf( 3.39767E-06, nu1,nu2)+a2)*sigma+mu hi=(f.ppf( 0.999996602, nu1,nu2)+a2)*sigma+mu Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 ); #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1) # X = a2 + (a2-a1).*(nu1./nu2).*X else: # 0 < a1 nu1 = 2 * (m1 + 1) nu2 = -2 * (m1 + m2 + 1) X = (X - a1) / (a1 - a2) * (nu2 / nu1) # r = a1 + (a1 - a2) .* (nu1./nu2) .* fpdf(X,nu1,nu2); p = -fcdf(X[1], nu1, nu2) + fcdf(X[0], nu1, nu2) hi=(-f.ppf( 3.39767E-06, nu1,nu2)+a1)*sigma+mu; lo=(-f.ppf( 0.999996602, nu1,nu2)+a1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 ); #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1) # X = a1 + (a1-a2).*(nu1./nu2).*X; elif dtype == 7: # t location-scale: standard support (-Inf,Inf) nu = 1. / c2 - 1 X = X / sqrt(c0 / (1 - c2)) m1 = nu m2 = 0 p = t.cdf(X[1], nu) - t.cdf(X[0], nu) lo=t.ppf( 3.39767E-06, nu )*sqrt(c0/(1-c2))*sigma+mu hi=t.ppf( 0.999996602, nu )*sqrt(c0/(1-c2))*sigma+mu Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( tcdf(X[0],nu), 0,1 ); #Inv2 = norm.ppf(tcdf(X[1], nu), 0, 1) # p = sqrt(c0./(1-c2)).*tpdf(X,nu); # X = sqrt(c0./(1-c2)).*X; else: print "ERROR: Unknown data type!" # elif dtype == 8: #Monte Carlo Simulation Histogram # out = kurt # p = skew # m1 = 0 # m2 = 0 # scale and shift # X = X.*sigma + mu; % Undo z-score if dtype != 1 and dtype != 2: mu_s=(mu-lo)/(hi-lo); sigma_s=sigma ** 2/(hi-lo) ** 2; alph = ((1-mu_s)/sigma_s -1/mu_s)*mu_s ** 2; beta = alph*(1/mu_s - 1); if alph >70 or beta>70: alph=70; beta=70; lo=mu-11.87434*sigma hi=2*mu-lo return p, dtype, Inv1, m1, m2, a1, a2, alph, beta, lo, hi
def gamma_cdf(x, a, loc, b): if a == 0 or b == 0: cdf = np.ones(x.shape) else: cdf = gamma.cdf(x, a, loc, b) return cdf
def get_ys(self, xs): background = self._get_param("Background") slope = self._get_param("Slope") power = self._get_param("Power") ys = background + (1.0 - background) * gamma.cdf(xs * slope, power) return ys
def mb_cdf(x, kT, alpha=1.5): return gamma.cdf(x, alpha, scale=kT)
rewardsW=np.zeros(actions.size); rewardsDiscrete=np.zeros(actions.size); rewardsW[actions<=tau]=actions[actions<=tau]; rewardsDiscrete[discreteActions<=tau]=discreteActions[discreteActions<=tau]; print(rewardsW); #plt.scatter(actions,rewardsW) #plt.show() # Find real Maximum x=np.linspace(0,maxPrice,1000); a=gamma.pdf(x,shape, scale=scale) p = x*(1 - gamma.cdf(x, a=shape, scale=scale)); plt.plot(x,a); plt.plot(x,p); plt.show(); x = actions.reshape(-1,1); y = rewardsW.reshape(-1,1); kernel = GPy.kern.RBF(input_dim=1, variance=1, lengthscale=0.01); gp = GPy.models.GPRegression(x,y,kernel); gp.optimize_restarts(num_restarts=8, verbose=False) print(gp) fig = gp.plot() GPy.plotting.show(fig)
def spicalc(data): import numpy as np from scipy.stats import gamma # remove any NaNs (i.e. missing numbers) from data so only real numbers exist tmp = data[~np.isnan(data)] # if there are less than 10 real datum with which to fit the distribution, # then return an array of NaN, otherwise, do the calculations spireturn = np.zeros(len(data)) + np.nan if len(tmp) > 10: # compute the shape and scale parameters using more than one non-zero data point # otherwise computation of the log will fail tmpnonz = tmp[np.where(tmp > 0.0)] if len(tmpnonz) > 1: A = np.log(np.mean(tmpnonz)) - (np.sum(np.log(tmpnonz)) / len(tmpnonz)) shp = (1.0 / (4 * A)) * (1 + ((1 + ((4 * A) / 3)) ** 0.5)) scl = np.mean(tmpnonz) / shp gam = gamma.cdf(tmpnonz, shp, scale=scl) else: # if there are no or one non-zero number, then the probability of non-zero numbers # is set as 0 or 1/len(tmp) (depending on len(tmpnonz)) gam = len(tmpnonz) / len(tmp) # fit the gamma distribution, G(x), already calculated as gam if there is more than # one non-zero number in the time series # if there are zero values, the cdf becomes H(x) = q + (1-q)G(x) where q is the # probability of a zero value in the time series numzero = len(tmp[np.where(tmp == 0.0)]) if numzero > 0: q = numzero / len(tmp) gam = q + (1 - q) * gam gcdf = np.zeros(len(tmp)) i = np.where(tmp > 0.0) j = np.where(tmp == 0.0) gcdf[i] = gam gcdf[j] = q else: gcdf = gam # define the constants for the approximation c0 = 2.515517 c1 = 0.802853 c2 = 0.010328 d1 = 1.432788 d2 = 0.189269 d3 = 0.001308 # compute the SPI values when the gamma cdf is non-uniform if len(gcdf[np.where(gcdf == 1.0)]) == 0: t = np.where(gcdf <= 0.5, (np.log(1 / (gcdf ** 2))) ** 0.5, (np.log(1 / ((1.0 - gcdf) ** 2)) ** 0.5)) ztmp = t - ((c0 + c1 * t + c2 * (t ** 2)) / (1 + d1 * t + d2 * (t ** 2) + d3 * (t ** 3))) s = np.where(gcdf <= 0.5, -1 * ztmp, ztmp) # if the grid cell is always dry (i.e. precip of zero, then SPI returns 0s as dry # is always "normal" else: s = np.zeros(len(gcdf)) spireturn[~np.isnan(data)] = s return spireturn
# Code from common.get_data import getData import numpy as np from math import log as ln from scipy.stats import gamma # Gamma distribution from scipy.special import psi # Digamma function from scipy.optimize import fsolve x = None f = lambda u, alpha, beta: gamma.pdf(u, alpha, scale = 1 / beta) f_X = lambda u: f(u, alpha, beta) cdf = lambda u, alpha, beta: gamma.cdf(u, alpha, scale = 1 / beta) k_underline = range(k) n_underline = range(n) import warnings warnings.filterwarnings('error') from common.tests import * from common.BIC import BIC import matplotlib.pyplot as plt cursor = getData(tableName, startDateTime, n, precision) for row in cursor:
def compile(alphabet, words, nonwords): print(' Generating all possible transitions...') from itertools import product all = [] for state_size in range(args.max_state_size + 1): all += product(product(alphabet, repeat = state_size), [*alphabet, None]) def of(string): for i in range(len(string)): yield string[max(0, i - args.max_state_size):i], string[i] yield string[max(0, len(string) - args.max_state_size):], None from collections import Counter counts = Counter() for word in tqdm(words, ' Counting transitions', leave = True): for state, symbol in of(word): counts[state, symbol] += 1 state_counts = Counter() for state, symbol in tqdm(counts, ' Counting states', leave = True): state_counts[state] += counts[state, symbol] import numpy as np logprobs = np.empty(len(all)) for i, (state, symbol) in enumerate(tqdm(all, ' Computing conditional transition probabilities', leave = True)): try: logprobs[i] = np.log(state_counts[state] / counts[state, symbol]) except ZeroDivisionError: logprobs[i] = np.inf print(' Fitting flattening distribution...') from scipy.stats import gamma params = gamma.fit(logprobs[logprobs != np.inf]) print(' Flattening...') logprobs = gamma.cdf(logprobs, *params) lower_bound = np.min(logprobs) upper_bound = np.max(logprobs[logprobs != 1]) new_logprobs = np.empty(len(logprobs), int) for i, logprob in enumerate(tqdm(logprobs, ' Discretizing', leave = True)): if logprob == 1: new_logprobs[i] = 2 ** args.transition_bits - 1 else: new_logprobs[i] = round((logprob - lower_bound) * ((2 ** args.transition_bits - 2) / (upper_bound - lower_bound))) logprobs = new_logprobs data = bytearray() bit_buffer = 0 bit_buffer_size = 0 for logprob in tqdm(logprobs, ' Packing', leave = True): bit_buffer = bit_buffer << args.transition_bits | int(logprob) bit_buffer_size += args.transition_bits if bit_buffer_size % 8 == 0: data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big') bit_buffer = 0 bit_buffer_size = 0 while bit_buffer_size % 8 != 0: bit_buffer = bit_buffer << args.transition_bits bit_buffer_size += args.transition_bits data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big') old_logprobs = np.empty(len(logprobs)) for i, logprob in enumerate(tqdm(logprobs, ' Undiscretizing...', leave = True)): if logprob == 2 ** args.transition_bits - 1: old_logprobs[i] = 1 else: old_logprobs[i] = lower_bound + logprob * ((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) print(' Unflattening...') old_logprobs = gamma.ppf(old_logprobs, *params) old_logprobs = dict(zip(all, old_logprobs)) def params_of(strings): strings_logprobs = np.empty(len(strings)) for i, string in enumerate(strings): strings_logprobs[i] = sum(old_logprobs[state, symbol] for state, symbol in of(string)) strings_params = gamma.fit(strings_logprobs[strings_logprobs != np.inf]) _, bins, _ = plt.hist(strings_logprobs[strings_logprobs != np.inf], 500, histtype = 'step', normed = True) plt.plot(bins, gamma.pdf(bins, *strings_params)) return strings_params print(' Fitting words distribution...') words_params = params_of(words) print(' Fitting nonwords distribution...') nonwords_params = params_of(nonwords) def minify(code): if args.minify: import subprocess p = subprocess.run([str(Path(__file__).parent / 'node_modules/uglify-js/bin/uglifyjs'), '--screw-ie8', '--mangle', 'sort,toplevel', '--compress', '--bare-returns', ], input = code.encode(), stdout = subprocess.PIPE, stderr = subprocess.PIPE) if p.returncode != 0: import sys sys.stderr.buffer.write(p.stderr) p.check_returncode() code = p.stdout.decode() return code print(' Generating JS code...') code = minify(r''' exports.init = function(buffer) { exports.test = (new Function('buffer', buffer.utf8Slice(''' + str(len(data)) + r''')))(buffer); }; ''').encode() data += minify(r''' var abs = Math.abs; var min = Math.min; var max = Math.max; var alphabet = [ ''' + r''' '''.join('"' + symbol + '",' for symbol in alphabet) + r''' ]; var of; (function() { function fold(string) { string = Array.from(string); for (var i = alphabet.length - 1; alphabet[i].length > 1; --i) { for (var j = 0; j <= string.length - alphabet[i].length; ++j) { if (string.slice(j, j + alphabet[i].length).join('') == alphabet[i]) { string.splice(j, alphabet[i].length, alphabet[i]); } } } return string; } of = function(string) { string = fold(string); var ofString = []; for (var i = 0; i < string.length; ++i) { ofString.push([string.slice(max(0, i - ''' + str(args.max_state_size) + r'''), i), string[i]]); } ofString.push([string.slice(max(0, string.length - ''' + str(args.max_state_size) + r''')), null]); return ofString; }; })(); var all; (function() { function product(xs, ys) { var result = []; for (var i = 0; i < xs.length; ++i) { for (var j = 0; j < ys.length; ++j) { result.push([xs[i], ys[j]]); } } return result; } function power(a, k) { if (k == 0) { return [[]]; } var result = []; for (var i = 0; i < a.length; ++i) { var b = power(a, k - 1); for (var j = 0; j < b.length; ++j) { result.push([a[i]].concat(b[j])); } } return result; } all = []; for (var stateSize = 0; stateSize <= ''' + str(args.max_state_size) + r'''; ++stateSize) { all = all.concat(product(power(alphabet, stateSize), alphabet.concat([null]))); } })(); var gammaPdf, gammaPpf; (function() { var pow = Math.pow; var exp = Math.exp; var log = Math.log; var sqrt = Math.sqrt; var cof = [ 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5, ]; function ln(x) { var j = 0; var ser = 1.000000000190015; var xx, y, tmp; tmp = (y = xx = x) + 5.5; tmp -= (xx + 0.5) * log(tmp); for (; j < 6; j++) ser += cof[j] / ++y; return log(2.5066282746310005 * ser / xx) - tmp; } gammaPdf = function(x, a) { if (x < 0) return 0; if (x === 0 && a === 1) return 1; return exp((a - 1) * log(x) - x - ln(a)); }; function lowReg(a, x) { var aln = ln(a); var ap = a; var sum = 1 / a; var del = sum; var b = x + 1 - a; var c = 1 / 1.0e-30; var d = 1 / b; var h = d; var i = 1; var ITMAX = -~(log((a >= 1) ? a : 1 / a) * 8.5 + a * 0.4 + 17); var an, endval; if (x < 0 || a <= 0) { return NaN; } else if (x < a + 1) { for (; i <= ITMAX; i++) { sum += del *= x / ++ap; } return sum * exp(-x + a * log(x) - aln); } for (; i <= ITMAX; i++) { an = -i * (i - a); b += 2; d = an * d + b; c = b + an / c; d = 1 / d; h *= d * c; } return 1 - h * exp(-x + a * log(x) - aln); } gammaPpf = function(p, a) { var j = 0; var a1 = a - 1; var EPS = 1e-8; var gln = ln(a); var x, err, t, u, pp, lna1, afac; if (p > 1) return NaN; if (p == 1) return Infinity; if (p < 0) return NaN; if (p == 0) return 0; if (a > 1) { lna1 = log(a1); afac = exp(a1 * (lna1 - 1) - gln); pp = (p < 0.5) ? p : 1 - p; t = sqrt(-2 * log(pp)); x = (2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t; if (p < 0.5) x = -x; x = max(1e-3, a * pow(1 - 1 / (9 * a) - x / (3 * sqrt(a)), 3)); } else { t = 1 - a * (0.253 + a * 0.12); if (p < t) x = pow(p / t, 1 / a); else x = 1 - log(1 - (p - t) / (1 - t)); } for(; j < 12; j++) { if (x <= 0) return 0; err = lowReg(a, x) - p; if (a > 1) t = afac * exp(-(x - a1) + a1 * (log(x) - lna1)); else t = exp(-x + a1 * log(x) - gln); u = err / t; x -= (t = u / (1 - 0.5 * min(1, u * ((a - 1) / x - 1)))); if (x <= 0) x = 0.5 * (x + t); if (abs(t) < EPS * x) break; } return x; }; })(); var logprobs = {}; var bitBuffer = 0, bitBufferSize = 0; var bufferOffset = 0; for (var i = 0; i < all.length; ++i) { while (bitBufferSize < ''' + str(args.transition_bits) + r''') { bitBuffer = bitBuffer << 8 | buffer.readUInt8(bufferOffset++); bitBufferSize += 8; } var logprob = bitBuffer >> (bitBufferSize - ''' + str(args.transition_bits) + r''') & ''' + hex(2 ** args.transition_bits - 1) + r'''; bitBufferSize -= ''' + str(args.transition_bits) + r'''; if (logprob == ''' + str(2 ** args.transition_bits - 1) + r''') { logprob = 1; } else { logprob = ''' + str(lower_bound) + r''' + logprob * ''' + str((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) + r'''; } logprob = ''' + str(params[1]) + r''' + gammaPpf(logprob, ''' + str(params[0]) + r''') * ''' + str(params[2]) + r'''; logprobs[all[i]] = logprob; } return function(string) { var stringLogprob = 0; var ofString = of(string); for (var i = 0; i < ofString.length; ++i) { stringLogprob += logprobs[ofString[i]]; } if (stringLogprob == Infinity) { return false; } var wordsDensity = gammaPdf((stringLogprob - ''' + str(words_params[1]) + r''') / ''' + str(words_params[2]) + r''', ''' + str(words_params[0]) + r''') / ''' + str(words_params[2]) + r'''; var nonwordsDensity = gammaPdf((stringLogprob - ''' + str(nonwords_params[1]) + r''') / ''' + str(nonwords_params[2]) + r''', ''' + str(nonwords_params[0]) + r''') / ''' + str(nonwords_params[2]) + r'''; if (wordsDensity > nonwordsDensity) { return true; } if (wordsDensity < nonwordsDensity) { return false; } return Math.random() >= 0.5; }; ''').encode() data, is_gzipped = bytes(data), False if args.gzip: import gzip print(' Gzipping...') gzipped_data = gzip.compress(data) if len(gzipped_data) < len(data): data, is_gzipped = gzipped_data, True return code, data, is_gzipped
n = 200 # Size of window precision = 3 # imageIndex = 0 binWidth = 50 # Code from common.get_data import getData import numpy as np from scipy.stats import gamma # Gamma distribution cdf = lambda u, p, alpha, beta: sum(p[i] * gamma.cdf(u, alpha[i], scale = 1 / beta[i]) for i in range(len(p))) import warnings warnings.filterwarnings('error') import matplotlib as mpl mpl.use('pgf') mpl.rcParams.update({ 'pgf.texsystem': 'xelatex', 'pgf.preamble': [r'\usepackage{unicode-math}'], 'text.usetex': True, 'text.latex.unicode': True, 'font.family': 'PT Serif', 'font.size': 14, }) import matplotlib.pyplot as plt
def target(self, vector,seed): random.seed(100*seed+0) x = np.linspace(0.01, 10000., num=100) # values for x-axis d = np.zeros(100) w = 0 for jj in range(0,len(vector)-1,3): d += vector[jj]*gamma.cdf(x, vector[jj+1], loc=0, scale=vector[jj+2]) # probability distribution w += vector[jj] d = np.diff(np.concatenate([[0],d])) sense = np.round(vector[-1]) timePointAll = d/w timePoint = np.copy(timePointAll) currEnv = 1 sumT = 0 prevchange = 0 np.random.shuffle(self.noise) for i,change in enumerate(self.trajectory): if currEnv == 0: env = self.env[currEnv] + self.noise[i] temp = np.copy(timePointAll) else: env = self.env[currEnv] - self.noise[i] a,b = self.gamma1Env[:,env] temp = np.diff(np.concatenate([[0],gamma.cdf(x, a, loc=0, scale=b)]))# probability distribution if sense == 1: opt = self.arrayCost[1][:,env] else: opt = self.arrayCost[0][:,env] inter = change-prevchange #print "1",i,currEnv,env,inter,change prevchange = change if sense == 0 or self.sud == 0: growth = np.sum(timePoint[opt>-1]*2**opt[opt>-1]) if growth == 0: return 1. sumT += 1.*inter*np.log2(growth) else: t2 = temp #First see who grows growth = np.sum(timePoint[opt>-1]*2**opt[opt>-1]) if growth == 0: return 1. #Now switch. Fast changes sumT += 1.*np.log2(growth) sumT += 1.*(inter-1)*np.log2(np.sum(t2[opt>-1]*2**opt[opt>-1])) #print 1.*np.log(growth),1.*(inter-1)*np.log(np.sum(t2 + t2 * opt)) currEnv = self.trajectoryX[change] #print "2",i,currEnv,env,inter,change fitness = sumT/self.trajectory[-1]#np.exp(sumT/self.trajectory[-1])-1. #print fitness if 0: penalty = 0.1*np.sum(np.abs(np.diff(timePointAll))>0.01) #0.1 for each sudden change in concentration fitness = fitness-penalty else: fitness = fitness if np.isnan(fitness): return 2. else: return -fitness
from scipy.stats import gamma print(gamma.cdf(4,6,0,3))
X = [12,15,8,13.5,25] Lambda = np.arange(0.01,1,0.01) prior_alpha = 1 prior_beta = 20 prior = Gamma(Lambda, prior_alpha, prior_beta) likelihood = Exponential(Lambda, X) posterior_alpha = prior_alpha + len(X) posterior_beta = prior_beta + np.sum(X) posterior = Gamma(Lambda, posterior_alpha, posterior_beta) PPTLT = GammaDist.cdf(1.0/10.0, posterior_alpha,scale=1.0/posterior_beta) print(PPTLT) if PLOT: f, axarr = plt.subplots(3, sharex=True) axarr[0].plot(Lambda,prior) axarr[0].set_title('Prior') axarr[1].plot(Lambda,likelihood) axarr[1].set_title('Likelihood') axarr[2].plot(Lambda, posterior) axarr[2].set_title('Posterior') plt.show() elif (EARTH_QUESTION): print('EARTH_QUESTION') Lambda = np.arange(0.01,1,0.01)