def example(): xy_grid = make_xy_grid(-4.5, 3.5, 100) x = np.concatenate([norm(-1, 1.).rvs(400), norm(1, 0.3).rvs(100)]) x = x.reshape([250, 2]) kde, bandwidth = kde_fit_cv(x) pdf = kde_eval(kde, xy_grid) plot(xv, yv, pdf)
def test_cdf_normal_case(self): (g, m, s) = (0., 0., 1.) assert_almost_equal(pearson3(g).cdf([0., .25, .5, 1.]), dist.norm().cdf([0., .25, .5, 1.])) assert_almost_equal(pearson3(g).pdf([0., .25, .5, 1.]), dist.norm().pdf([0., .25, .5, 1.])) assert_almost_equal(pearson3(g).ppf([0.001, 0.25, 0.50, 0.999]), dist.norm().ppf([0.001, 0.25, 0.50, 0.999]),) assert_almost_equal(pearson3(g).sf([0., .25, .5, 1.]), dist.norm().sf([0., .25, .5, 1.])) assert_almost_equal(pearson3(g).isf([0.001, 0.25, 0.50, 0.999]), dist.norm().isf([0.001, 0.25, 0.50, 0.999]))
def testImportanceMixing(popsize = 5000, forcedRefresh = 0.0): import pylab distr1 = norm() distr2 = norm(loc = 1.5) p1 = distr1.rvs(popsize) inds, np = importanceMixing(p1, distr1.pdf, distr2.pdf, lambda: distr2.rvs()[0], forcedRefresh) reuse = [p1[i] for i in inds] p2 = reuse + np p2b = distr2.rvs(popsize) pylab.hist(array([p2, p2b]).T, 20, normed=1, histtype='bar') pylab.show()
def test_max_likelihood(): import pdb pdb.set_trace() x = [1., 2.] obs = gaussian_generator(mu=x[0], sigma=x[1]) pdf = norm(x[0], x[1]).pdf func_to_minimize = lambda x: -get_log_likelihood( norm(x[0], x[1]).pdf, obs ) solver = Solver(func_to_minimize, minimize) x0 = [0., 0.5] res = solver.solve(x0) print res
def init_distributions(pkey, kind='dpm', nrvs=25, tb=.65, force_normal=False): """ sample random parameter sets to explore global minima (called by Optimizer method __hop_around__()) """ loc, scale = get_theta_params(pkey, kind=kind) bounds = get_bounds(kind=kind)[pkey] lower = np.min(bounds) upper = np.max(bounds) normal_params = ['a', 'tr', 'v', 'vd', 'ssv', 'sso', 'xb', 'z', 'Beta'] uniform_params = ['vi', 'BX', 'AX', 'PX', 'si'] # init and freeze dist shape if pkey in normal_params: dist = norm(loc, scale) # elif pkey in gamma_params: # dist = gamma(1.0, loc, scale) elif pkey in uniform_params: dist = uniform(loc, scale) # generate random variates rvinits = dist.rvs(nrvs) while rvinits.min() < lower: # apply lower limit ix = rvinits.argmin() rvinits[ix] = dist.rvs() while rvinits.max() > upper: # apply upper limit ix = rvinits.argmax() rvinits[ix] = dist.rvs() if pkey =='tr': rvinits = np.abs(rvinits) rvinits[rvinits<lower] = lower rvinits[rvinits>upper] = upper return rvinits
def analy(self,N_o): ''' Analytical solution ''' # --- portfolio loss distribution @ t = \tau via analytical formulas --- opt_val = lambda S: - 5.*bp.blsprice(S,self.K,self.rfr,self.T-self.tau,self.sigma,'put') \ - 10.*bp.dnOutCall(S,self.K,self.rfr,self.T-self.tau,self.sigma,self.H) #ran1 = npr.multivariate_normal(np.zeros(self.D),np.eye(self.D),N_o) #ran1 = npr.standard_normal((N_o,self.D)) ran1 = norm(loc=0,scale=1).ppf(lhs(self.D,samples=N_o)) S1 = np.zeros((N_o,self.D)) S1[:,:] = self.S0 S1[:,:] = S1[:,:] * np.exp((self.mu - 0.5 * self.sigma**2) * self.tau\ + self.sigma * np.sqrt(self.tau) * ran1[:,:]) t0 = time.time() ValueTau = np.zeros(N_o) for n in range(N_o): ValueTau[n] = np.sum(map(opt_val,S1[n,:])) print "%.2fs eclipsed" % (time.time() - t0) L_analy = np.sort(self.Value0 - ValueTau) #L_analy = np.sort(-ValueTau + self.Value0 * np.exp(self.rfr*self.tau)) var = scs.scoreatpercentile(L_analy, self.perc*100.) eel = np.mean(np.maximum(L_analy-var,0)) return (var, eel)
def fit(self, X, y): X = numpy.array(X) self.initial_values = [] self.transformed_values = [] for axis in range(X.shape[1]): initial_values = X[:, axis] * (1 + 1e-6 * numpy.random.normal(size=len(X))) initial_values += 1e-8 * numpy.random.normal(size=len(X)) indices = numpy.argsort(initial_values) initial_values = initial_values[indices] self.initial_values.append(initial_values) transformed = numpy.arange(len(X), dtype='float') # increase the distance between neighs of different classes additions = numpy.abs(numpy.diff(y[indices])) additions = numpy.cumsum(additions) transformed[1:] += additions * self.scale transformed /= transformed[-1] / 2. transformed -= 1 if self.like_normal: # converting to normal-like distributions transformed -= transformed[0] transformed /= transformed[-1] / 0.9 transformed += 0.05 transformed = norm().ppf(transformed) self.transformed_values.append(transformed) return self
def add_set_value_random_norm(self, variable, means, stdvs ) : """ Add a 'Set Value' macro command where the value is chosen from a random normal distribution. Parameters ---------- variable: string An AnyScript variable or a list of AnyScript variables. means: int,float, numpy.ndarray The mean value of the random number stdvs: The standar deviation of the random variable Examples -------- Set variable across different macros >>> seed(1) >>> mg = MonteCarloMacroGenerator(number_of_macros=5) >>> mg.add_set_value_random_norm('Main.Var', means = [1,2,4], stdvs = [0.1,0.5,2]) >>> for line in mg.generate_macros(): pprint(line) ['classoperation Main.Var "Set Value" --value="{1,2,4}"'] ['classoperation Main.Var "Set Value" --value="{0.979048215908,2.29190287213,-3.36989533908}"'] ['classoperation Main.Var "Set Value" --value="{0.948229648476,1.47477555917,1.34701845466}"'] ['classoperation Main.Var "Set Value" --value="{0.910823783045,1.80133318708,3.47655384811}"'] ['classoperation Main.Var "Set Value" --value="{1.00974531575,1.8980227331,4.96468967866}"'] """ dist = distributions.norm(means,stdvs) self.add_set_value_random(variable,dist)
def induce_correlations(data, corrmat): """ Induce a set of correlations on a column-wise dataset Parameters ---------- data : 2d-array An m-by-n array where m is the number of samples and n is the number of independent variables, each column of the array corresponding to each variable corrmat : 2d-array An n-by-n array that defines the desired correlation coefficients (between -1 and 1). Note: the matrix must be symmetric and positive-definite in order to induce. Returns ------- new_data : 2d-array An m-by-n array that has the desired correlations. """ # Create an rank-matrix data_rank = np.vstack([rankdata(datai) for datai in data.T]).T # Generate van der Waerden scores data_rank_score = data_rank / (data_rank.shape[0] + 1.0) data_rank_score = norm(0, 1).ppf(data_rank_score) # Calculate the lower triangular matrix of the Cholesky decomposition # of the desired correlation matrix p = chol(corrmat) # Calculate the current correlations t = np.corrcoef(data_rank_score, rowvar=0) # Calculate the lower triangular matrix of the Cholesky decomposition # of the current correlation matrix q = chol(t) # Calculate the re-correlation matrix s = np.dot(p, np.linalg.inv(q)) # Calculate the re-sampled matrix new_data = np.dot(data_rank_score, s.T) # Create the new rank matrix new_data_rank = np.vstack([rankdata(datai) for datai in new_data.T]).T # Sort the original data according to new_data_rank for i in range(data.shape[1]): vals, order = np.unique( np.hstack((data_rank[:, i], new_data_rank[:, i])), return_inverse=True ) old_order = order[: new_data_rank.shape[0]] new_order = order[-new_data_rank.shape[0] :] tmp = data[np.argsort(old_order), i][new_order] data[:, i] = tmp[:] return data
def test_box_normal_pdf(): """ Testing normal distribution for box approach """ vx = np.linspace(-vmax, vmax, 1000) f = box_to_pdf(data, vx) ftrue = norm(0, sigma).pdf(vx) error = relative_L2_error(f, ftrue, vx) assert_almost_equal(error, 0, decimal=2)
def test_normal_pdf(): """ Testing normal distribution for point approach """ x = np.linspace(-vmax, vmax, 1000) fapprox = data_to_pdf(data[:, 3], x) ftrue = norm(0, sigma).pdf(x) error = relative_L2_error(fapprox, ftrue, x) assert_almost_equal(error, 0, decimal=2)
def nbTL(self, node): probs = [1,1] ## print(node.tag) ## print(node.attrib['position']) ## print(node.attrib['length']) ## print(node.attrib['size']) ## print(node.attrib['left']) ## print(node.attrib['width']) ## print(node.attrib['height']) ## print(node.attrib['top']) for label in range(2): if node.tag in self.mtagid[1]: probs[label] *= self.mtagid[label][node.tag] else: probs[label] *= 0 if node.attrib['position'] == '0': probs[label] *= self.mpos[label][0] else: probs[label] *= self.mpos[label][1] normer = distributions.norm(self.mlen[label][0], self.mlen[label][1]) probs[label] *= normer.pdf(int(node.attrib['length'])) normer = distributions.norm(self.msize[label][0], self.msize[label][1]) probs[label] *= normer.pdf(int(node.attrib['size']))**2 normer = distributions.norm(self.mleft[label][0], self.mleft[label][1]) probs[label] *= normer.pdf(int(node.attrib['left'])) normer = distributions.norm(self.mwidth[label][0], self.mwidth[label][1]) probs[label] *= normer.pdf(int(node.attrib['width'])) #normer = distributions.norm(self.mheight[label][0], self.mheight[label][1]) #probs[label] *= (normer.pdf(int(node.attrib['height'])))**0.5 if probs[1] > probs[0]: #print(probs) return True else: return False
def generate_init(model, jj, ns, output_file, lognorm=False, best_reg=True): #select the parameters if best_reg: p_to_fit = [p for p in model.parameters if p.name[0] == 'k'] num_ind = len(p_to_fit)*ns ini_val = lhs(len(p_to_fit), samples = ns) fname_new = 'stat-%d.pkl' % (jj) stat = pickle.load(open(fname_new, "rb")) means = stat[0] stdvs = stat[1] if lognorm: for ind in range(len(p_to_fit)): ini_val[:,ind] = norm(loc=means[ind], scale=stdvs[ind]).ppf(ini_val[:,ind]) else: # First shift unit hypercube to be centered around origin # Then scale hypercube along each dimension by 2 stdevs # Finally shift hypercube to be centered around nominal values ini_val = means + 2 * stdvs * (ini_val - 0.5) else: p_to_fit = [p for p in model.parameters if p.name[0] == 'k'] nominal_values = np.array([q.value for q in p_to_fit]) log_nominal_values = np.log10(nominal_values) #latin hypercube sampling for picking a starting point num_ind = len(p_to_fit)*ns ini_val = lhs(len(p_to_fit), samples = ns) means = log_nominal_values stdvs = np.ones(len(p_to_fit)) if lognorm: for ind in range(len(p_to_fit)): ini_val[:,ind] = norm(loc=means[ind], scale=stdvs[ind]).ppf(ini_val[:,ind]) else: # First shift unit hypercube to be centered around origin # Then scale hypercube along each dimension by 2 stdevs # Finally shift hypercube to be centered around nominal values ini_val = means + 2 * stdvs * (ini_val - 0.5) np.save(output_file, ini_val)
def test_lstats_gaussian(self): "Try lstats on Gaussian" _nor = [ 0. , 0.56418958, 0. , 0.12260172, 0. , 0.04366115, 0. , 0.02184314, 0. , 0.0129635 , 0. , 0.00852962, 0. , 0.00601389, 0. , 0.00445558, 0. , 0.00342643, 0. , 0.00271268] assert_almost_equal(dist.norm.lstats(20), _nor) assert_almost_equal(dist.norm.lstats(20, 0., 1.), _nor) _nor = [ 1. , 1.69256875, 0. , 0.12260172, 0. , 0.04366115, 0. , 0.02184314, 0. , 0.0129635 , 0. , 0.00852962, 0. , 0.00601389, 0. , 0.00445558, 0. , 0.00342643, 0. , 0.00271268] assert_almost_equal(dist.norm(1., 3.).lstats(20), _nor) assert_almost_equal(dist.norm.lstats(20, 1., 3.), _nor)
def test_setvalue_random(): np.random.seed(1) fdist = norm(2, [1, 1, 1]) c = mc.SetValue_random("val", fdist) assert c.get_macro(0) == 'classoperation val "Set Value" --value="{2,2,2}"' mg = AnyMacro(c) macrolist = mg.create_macros_MonteCarlo(2) assert macrolist[0][0] == 'classoperation val "Set Value" --value="{2,2,2}"' assert ( macrolist[1][0] == 'classoperation val "Set Value" --value="{1.79048215908,2.58380574427,-1.68494766954}"' )
def test_set_value_LHS(self): seed(1) normdist = norm( [1,3,4], [0.1,0.5,1] ) mg = LatinHyperCubeMacroGenerator(number_of_macros=4) mg.add_set_value_LHS('Main.myvar1',normdist) mg.add_set_value_LHS('Main.myvar2', normdist) macros = mg.generate_macros() assert len(macros) == 4 assert macros[0][0] == 'classoperation Main.myvar1 "Set Value" --value="{1,3,4}"' assert macros[0][1] == 'classoperation Main.myvar2 "Set Value" --value="{1,3,4}"' assert macros[1][0] == 'classoperation Main.myvar1 "Set Value" --value="{0.973478019766,2.64702266602,0.0441088571967}"' assert macros[1][1] == 'classoperation Main.myvar2 "Set Value" --value="{1.12273573036,2.17228489089,4.15538179788}"'
def get_noise_value_at(x, variance, noise_gen, val_min=0, val_max=1): """ Returns the noise value for noise_gen for a given variance at x. The noise_gen is assumed to represent a [0, 1] hypercube and is smoothed by a gaussian distribution with variance variance. Note that the smoothing is hard-capped at a 3 sigma interval due to performance reasons. Parameters ---------- x : list of real values The values of x. The ith entry represents the value of x in the ith dimension. variance : float The variance of the normal distribution to smooth the noise. noise_gen : ndarray The array representing the generated noise. val_min, val_max : float This is used to scale the actual maximum and minimum values to represent the same as otherwise values would not be comparable between variances. Returns ------- x_value : float The value of the function at the point x. """ x_value = 0 prob_sum = 0 gaussian = norm(scale=variance) dims = len(noise_gen.shape) points = len(noise_gen[0]) closest_idx = _gen_closest_index(x, points) close_indices = _gen_close_indices(closest_idx, max(1, int(variance*3*points)), dims, points) for i in close_indices: dist = _calc_distance_grid(x, i, points) prob = gaussian.pdf(dist) prob_sum += gaussian.pdf(dist) x_value += prob * noise_gen[i] x_value /= prob_sum x_value = (x_value - val_min)/(val_max- val_min) return x_value
def MixedEstimator(abg_init, binnedTrain, dp_tol = 1e-2): phis = binnedTrain.bins.keys(); theta = binnedTrain.theta dp = dp_tol*2.0; abg = abg_init while dp > dp_tol: Tf = binnedTrain.getTf() xmin = FPMultiPhiSolver.calculate_xmin(Tf, abg) dx = FPMultiPhiSolver.calculate_dx(abg, xmin) dt = FPMultiPhiSolver.calculate_dt(dx, abg, xmin, factor = 8.) S = FPMultiPhiSolver(theta, phis, dx, dt, Tf, xmin) Fs = S.solve(abg, visualize=False) Ss = S.transformSurvivorData(binnedTrain) Ls = Fs[:,:,-1] - Ss Nus = S.solveAdjoint(abg, Ls) dGdp = S.estimateParameterGradient(abg, Fs, Nus) from numpy.linalg.linalg import norm dG_normalized = dGdp/ norm(dGdp) dp = FortetLineEstimator(binnedTrain, abg, dG_normalized, dp_tol) abg = abg - dp*dG_normalized print 'dG = ', dG_normalized print 'dp = ', dp print 'abg = (%.3g, %.3g, %.3g)'%(abg[0],abg[1],abg[2]) print '-' return abg
def init_distributions(pkey, kind='dpm', mu = None, sigma = None, nrvs=25, tb=.65): """ sample random parameter sets to explore global minima (called by Optimizer method __hop_around__()) """ if mu is None: mu = {'a': .15, 'tr': .02, 'v': 1., 'ssv': -1., 'z': .1, 'xb': 1., 'sso': .15, 'vi': .35, 'vd': .5} if sigma is None: sigma = {'a': .35, 'tr': .25, 'v': .5, 'ssv': .5, 'z': .05, 'xb': .5, 'sso': .01, 'vi': .4, 'vd': .5} normal_params = ['tr', 'v', 'vd', 'ssv', 'z', 'xb', 'sso'] gamma_params = ['a', 'tr'] uniform_params = ['vd', 'vi'] if 'race' in kind: sigma['ssv'] = abs(mu['ssv']) bounds = get_bounds(kind=kind)[pkey] loc = mu[pkey] scale = sigma[pkey] # init and freeze dist shape if pkey in normal_params: dist = norm(loc, scale) elif pkey in gamma_params: dist = gamma(1.0, loc, scale) elif pkey in uniform_params: dist = uniform(loc, scale) # generate random variates rvinits = dist.rvs(nrvs) while rvinits.min() < bounds[0]: # apply lower limit ix = rvinits.argmin() rvinits[ix] = dist.rvs() while rvinits.max() > bounds[1]: # apply upper limit ix = rvinits.argmax() rvinits[ix] = dist.rvs() if pkey =='tr': rvinits = np.abs(rvinits) return rvinits
def sample_pspace(model, param_list=None, bounds=None, samples=100, seed=None): """ A DataFrame where each row represents a location in the parameter space, locations distributed to exercise the full range of values that each parameter can take on. This is useful for quick and dirty application of tests to a bunch of locations in the sample space. Kind-of a fuzz-testing for the model. Uses latin hypercube sampling, with random values within the sample bins. The LHS sampler shuffles the bins each time, so a subsequent call will yield a different sample from the parameter space. When a variable has both upper and lower bounds, use a uniform sample between those bounds. When a variable has only one bound, use an exponential distribution with the scale set to be the difference between the bound and the current model value (1 if they are the same) When the variable has neither bound, use a normal distribution centered on the current model value, with scale equal to the absolute value of the model value (1 if that magnitude is 0) Parameters ---------- model: pysd.Model object param_list: None or list of strings The real names of parameters to include in the explored parameter space. If None, uses all of the constants in the model except TIME STEP, INITIAL TIME, etc. bounds: DataFrame, string filename, or None A range test matrix as used for bounds checking. If None, creates one from the model These bounds can also place artificial limits on the parameter space you want to explore, even if the theoretical bounds on the variable are infinite. samples: int How many samples to include in the iterator? Returns ------- lhs : pandas DataFrame distribution-weighted latin hypercube samples Note ---- Executes the model by 1 time-step to get the current value of parameters. """ if param_list is None: doc = model.doc() param_list = sorted( list( set(doc[doc['Type'] == 'constant']['Real Name']) - {'FINAL TIME', 'INITIAL TIME', 'TIME STEP', 'TIME STEP'})) if isinstance(bounds, _pd.DataFrame): bounds = bounds.set_index('Real Name') elif bounds is None: bounds = create_bounds_test_matrix(model).set_index('Real Name') elif isinstance(bounds, str): if bounds.split('.')[-1] in ['xls', 'xlsx']: bounds = _pd.read_excel(bounds, sheetname='Bounds', index_col='Real Name') elif bounds.split('.')[-1] == 'csv': bounds = _pd.read_csv(bounds, index_col='Real Name') elif bounds.split('.')[-1] == 'tab': bounds = _pd.read_csv(bounds, sep='\t', index_col='Real Name') else: raise ValueError('Unknown file type: bounds') else: raise ValueError('Unknown type: bounds') if seed is not None: _np.random.seed(seed) unit_lhs = _pd.DataFrame(_pyDOE.lhs(n=len(param_list), samples=samples), columns=param_list) # raw latin hypercube sample res = model.run(return_timestamps=[model.components.initial_time()]) lhs = _pd.DataFrame(index=unit_lhs.index) for param in param_list: lower, upper = bounds[['Min', 'Max']].loc[param] value = res[param].iloc[0] if lower == upper: lhs[param] = lower elif _np.isfinite(lower) and _np.isfinite( upper): # np.isfinite(0)==True scale = upper - lower lhs[param] = _dist.uniform(lower, scale).ppf(unit_lhs[param]) elif _np.isfinite(lower) and _np.isinf(upper): if lower == value: scale = 1 else: scale = value - lower lhs[param] = _dist.expon(lower, scale).ppf(unit_lhs[param]) elif _np.isinf(lower) and _np.isfinite( upper): # np.isinf(-np.inf)==True if upper == value: scale = 1 else: scale = upper - value lhs[param] = upper - _dist.expon(0, scale).ppf(unit_lhs[param]) elif _np.isinf(lower) and _np.isinf(upper): # np.isinf(-np.inf)==True if value == 0: scale = 1 else: scale = abs(value) lhs[param] = _dist.norm(value, scale).ppf(unit_lhs[param]) else: raise ValueError('Problem with lower: %s or upper: %s bounds' % (lower, upper)) return lhs
def mult(A, B, epsilon=0.25, delta=0.25, decompose=False): ''' Matrix multiplication using Algorithm 1 (Page 5) without the use of the Fast Johnson-Lindenstrauss Transform (FJLT). Let C be the output of this function. Then: Pr(||AB-C||_F <= 2*epsilon*||A||_F * ||B||_F) >= 1 - delta. Time complexity of: O(b(1/e^2((n+g)(m+p)) + g(n(m+p)+lg(g)))), where: * e = epsilon * b = beta = O(lg(1/delta)) * g = gamma = O(b + lg(b)) = O(lg(1/delta) + lg(lg(1/delta))). with * epsilon > 0 * 0 < delta <= 1/2, since we require lglg(1/delta) >= 0 (lg = log base 2). decompose - if True, returns two matrices X, Y such that XY = AB, though X and Y don't take up as much space as A and B. ''' assert epsilon > 0, f'epsilon must be greater than 0. Found epsilon={epsilon}' assert 0 <= delta <= 1 / 2., f'delta must be in (0, 1/2]. Found delta={delta}' m, n = A.shape nn, p = B.shape assert n == nn, f'Dimension mismatch: cannot multiply {(m, n)} by {(nn, p)}.' # Step 1, Lines 1-2: # Compute lg(1/delta) tug-of-war matrices, S_i, using standard normal. N = norm(0, 1) beta = np.log2(1 / delta) S = epsilon * N.rvs(size=discretize((beta, 1 / epsilon**2, n))) # Step 2, Lines 3-4: # Compute lg(1/delta) x 2(lg(1/delta) + lg(lg(1/delta))) tug-of-war matrices, # Q_{i,j}, using standard normal. gamma = 2 * (beta + np.log2(beta)) # If 1/epsilon^2 = 16, then epsilon = 1/4. Q = (1 / 4.) * N.rvs(size=discretize((beta, gamma, 16, p))) # Step 3a: Compute transpose of matrices first. St = np.transpose(S, axes=(0, 2, 1)) Qt = np.transpose(Q, axes=(0, 1, 3, 2)) # Step 3, Line 5: # Compute SB and AS^t for all S. SB, ASt = S @ B, A @ St # Step 4, Line 6: # Compute A(BQ^t) and then X = A(BQ^t) for all Q. BQt = B @ Qt ABQt = A @ BQt X = ABQt # Step 5, Line 7: # Compute (SB)Q^t and then Xhat = (AS^t)(SBQ^t). SBQt = np.einsum('hjk,hikl->hijl', SB, Qt) Xhat = np.einsum('hjk,hikl->hijl', ASt, SBQt) # Step 6, Line 8: # Compute y_{i,j} = ||X_{i,j}-Xhat{i,j}||_F^2. y = np.linalg.norm(X - Xhat, axis=(2, 3))**2 # Step 7, Line 9: # Compute z_i = median of y_{i,j} over j. z = np.median(y, axis=1) # Step 8, Line 10: # Compute i^* = argmin z_i. i = np.argmin(z) return (ASt[i], SB[i]) if decompose else ASt[i] @ SB[i]
import csv import pymbar from pymbar import testsystems, MBAR, timeseries import pandas as pd import numpy as np import random import matplotlib.pyplot as plt import copy import os import os.path import optparse import scipy from optparse import OptionParser from scipy.stats import distributions normal1 = distributions.norm(3, 1.1) normal2 = distributions.norm(7, 1) uniform = distributions.uniform(0, 10) gamma = distributions.gamma(6, 0.01) x = np.linspace(0, 10, 100) y1 = normal1.pdf(x) + normal2.pdf(x) y2 = uniform.pdf(x) plt.plot(x, y2) plt.fill_between(x, 0, y2) plt.title('Prior Distribution') plt.ylim([0, 0.5]) plt.show() plt.plot(x, y1 * y2)
from pyDOE import * from scipy.stats.distributions import norm design = lhs(4, samples=10) means = [1, 2, 3, 4] stdvs = [0.1, 0.5, 1, 0.25] for i in range(0,4): design[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(design[:, i])
def normals_pdf(mu, sigma, p, obs): return sum([p_ * norm(mu_, sigma_).pdf(obs) for mu_, sigma_, p_ in zip(mu, sigma, p)])
def main(): # set the mean and standard deviation of la and xi m_la, std_la = 10.0, 1.0 m_xi, std_xi = 1.0, 0.1 # construct objects representing normal distributions pdistrib_la = norm(loc=m_la, scale=std_la) pdistrib_xi = norm(loc=m_xi, scale=std_xi) # get operators for probability density functions g_la = pdistrib_la.pdf g_xi = pdistrib_xi.pdf # number of integration points set equal for both variables n_i = 10 # generate midpoints of n_i intervals in the range (-1,1) theta_arr = np.linspace(-(1.0 - 1.0 / n_i), 1.0 - 1.0 / n_i , n_i) # scale up theta_arr to cover the random domains theta_la = m_la + 4 * std_la * theta_arr theta_xi = m_xi + 4 * std_xi * theta_arr # get the size of the integration cells d_la = (8 * std_la) / n_i d_xi = (8 * std_xi) / n_i def Heaviside(x): """Heaviside function.""" return x >= 0.0 def q_eq13(eps, la, xi): """Response function of a single fiber.""" return la * eps * Heaviside(xi - eps) def mu_q_eq13_loops(eps_arr): """Loop-based calculation of mean values.""" mu_q_arr = np.zeros_like(eps_arr) for i, eps in enumerate(eps_arr): mu_q = 0.0 for la in theta_la: for xi in theta_xi: dG = g_la(la) * g_xi(xi) * d_la * d_xi mu_q += q_eq13(eps, la, xi) * dG mu_q_arr[i] = mu_q return mu_q_arr # construct an array of control strains eps_arr = np.linspace(0, 1.2, 80) # construct an array of control strains eps_arr = np.linspace(0, 1.2, 80) start_time = sysclock() mu_q_arr = mu_q_eq13_loops(eps_arr) print 'loop-based: elapsed time', sysclock() - start_time dG_la = g_la(theta_la) * d_la dG_xi = g_xi(theta_xi) * d_xi dG_grid = dG_la[:, np.newaxis] * dG_xi[np.newaxis, :] def mu_q_eq13(eps): """Loopless calculation of mean value.""" q_grid = q_eq13(eps, theta_la[:, np.newaxis], theta_xi[np.newaxis, :]) q_dG_grid = q_grid * dG_grid return np.sum(q_dG_grid) mu_q_eq13_vct = np.vectorize(mu_q_eq13) # eps_arr from line reused here start_time = sysclock() mu_q_arr = mu_q_eq13_vct(eps_arr) print 'Regular grid of random variables: elapsed time', sysclock() - start_time p.subplot(121) p.plot(eps_arr, mu_q_arr, color='blue', label='Tgrid') p.subplot(122) expander = np.ones((n_i, n_i), dtype=int) p.plot((theta_la[np.newaxis, :] * expander).flatten(), (theta_xi[:, np.newaxis] * expander).flatten(), 'b.', label='Tgrid') def get_mu_q_fn(q, dG, *theta): """Return a method evaluating the mean of q().""" def mu_q(eps): Q_dG = q(eps, *theta) * dG return np.sum(Q_dG) return np.vectorize(mu_q) # SAMPLING: (*\label{line:TGrid_example_start}*) # ... reuse dG_grid and theta (lines (*\ref{line:theta_la}*), (*\ref{line:theta_xi}*) and (*\ref{line:g_la}*)-(*\ref{line:dG_grid}*)) # INSTANTIATION: mu_q_fn = get_mu_q_fn(q_eq13, dG_grid, theta_la[:, np.newaxis], theta_xi[np.newaxis, :]) # CALCULATION: mu_q_arr = mu_q_fn(eps_arr) # SAMPLING: # equidistant sampling probabilities (see Eq. (*\ref{eq:p_grid_sampling}*)) j_arr = np.arange(1, n_i + 1) pi_arr = (j_arr - 0.5) / n_i # use ppf (percent point function) to get sampling points # (pdistrib_la and pdistrib_xi was defined at lines (*\ref{line:pdistrib_la}*), (*\ref{line:pdistrib_xi}*)) theta_la = pdistrib_la.ppf(pi_arr) theta_xi = pdistrib_xi.ppf(pi_arr) # get the total number of integration points # for 2 random variaables with equal n_i n_sim = n_i ** 2 # INSTANTIATION: mu_q_fn = get_mu_q_fn(q_eq13, 1.0 / n_sim, theta_la[:, np.newaxis], theta_xi[np.newaxis, :]) start_time = sysclock() # CALCULATION: mu_q_arr = mu_q_fn(eps_arr) print 'Grid of constant probabilities: elapsed time', sysclock() - start_time p.subplot(121) p.plot(eps_arr, mu_q_arr, color='cyan', label='Pgrid') p.subplot(122) p.plot((theta_la[np.newaxis, :] * expander).flatten(), (theta_xi[:, np.newaxis] * expander).flatten(), 'co', label='Pgrid') # SAMPLING: # generate n_sim random realizations # using pdistrib objects (lines (*\ref{line:pdistrib_la}*), (*\ref{line:pdistrib_xi}*)) theta_la_rvs = pdistrib_la.rvs(n_sim) theta_xi_rvs = pdistrib_xi.rvs(n_sim) # INSTANTIATION: mu_q_fn = get_mu_q_fn(q_eq13, 1.0 / n_sim, theta_la_rvs, theta_xi_rvs) start_time = sysclock() # CALCULATION: mu_q_arr = mu_q_fn(eps_arr) print 'Monte-Carlo: elapsed time', sysclock() - start_time p.subplot(121) p.plot(eps_arr, mu_q_arr, color='red', label='Monte-Carlo') p.subplot(122) p.plot(theta_la_rvs, theta_xi_rvs, 'rD', label='Monte-Carlo') # SAMPLING: (*\label{line:LHS_example_start}*) # sampling probabilities (see Eq. (*\ref{eq:LHS_sampling}*)), n_sim as above j_arr = np.arange(1, n_sim + 1) pi_arr = (j_arr - 0.5) / n_sim # get the ppf values (percent point function) # using pdistrib objects defined at lines (*\ref{line:pdistrib_la}*), (*\ref{line:pdistrib_xi}*) theta_la_ppf = pdistrib_la.ppf(pi_arr) theta_xi_ppf = pdistrib_xi.ppf(pi_arr) # make random permutations of both arrays to diminish # correlation (not necessary for one of the random variables) theta_la = np.random.permutation(theta_la_ppf) theta_xi = theta_xi_ppf # INSTANTIATION: mu_q_fn = get_mu_q_fn(q_eq13, 1.0 / n_sim, theta_la, theta_xi) start_time = sysclock() # CALCULATION: mu_q_arr = mu_q_fn(eps_arr) print 'Grid of constant probabilities: elapsed time', sysclock() - start_time p.subplot(121) p.plot(eps_arr, mu_q_arr, color='green', label='LHS') p.subplot(122) p.plot(theta_la, theta_xi, 'go', label='LHS') p.subplot(121) p.legend() p.xlabel('$\\varepsilon$', fontsize=24) p.ylabel('$q$', fontsize=24) ############################## Discretization grids ######################## p.subplot(122) p.ylabel('$\\theta_{\\xi}$', fontsize=24) p.ylim(0.5, 1.5) p.xlim(5, 15) p.xlabel('$\\theta_{\lambda}$', fontsize=24) p.legend() p.show()
from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted, check_X_y, check_array from sklearn.utils import check_random_state from .utils import atleast_list, issequence from .mathfun.special import logsumexp from .basis_functions import LinearBasis, apply_grad from .likelihoods import Gaussian from .optimize import sgd, structured_sgd, logtrick_sgd, Adam from .btypes import Bound, Positive, Parameter # Set up logging log = logging.getLogger(__name__) # Module settings WGTRND = norm() # Sampling distribution over mixture weights COVRND = gamma(a=2, scale=0.5) # Sampling distribution over mixture covariance LOGITER = 500 # Number of SGD iterations between logging ELBO and hypers class GeneralizedLinearModel(BaseEstimator, RegressorMixin): r""" Bayesian Generalized linear model (GLM). This provides a scikit learn compatible interface for the glm module. Parameters ---------- likelihood : Object A likelihood object, see the likelihoods module. basis : Basis
def normatval(mu, sigma, x): normdist = dist.norm() return normdist.pdf((x-mu)/sigma)
s = self.discrete_rv x = self.continuous_rv for k in range(len(s.xk)): F = F + x.cdf(z - s.xk[k]) * s.pk[k] return F def _pdf(self, z): f = 0 s = self.discrete_rv x = self.continuous_rv for k in range(len(s.xk)): f = f + x.pdf(z - s.xk[k]) * s.pk[k] return f if __name__ == "__main__": # If running this code instead of importing... x = iid.norm() # Create continuous rv Omega = (-1, 0, 1) # Sample space for discrete rvs # Create two discrete rvs r = iid.rv_discrete(values=(Omega, (1 / 3., 1 / 2., 1 / 6.))) s = iid.rv_discrete(values=(Omega, (5 / 6., 1 / 12., 1 / 12.))) # Create new convolved rv: y = ConvolvedContinuousAndDiscrete(x, s) t = ConvolvedDiscrete(r, s)
def __data_inverse(self, data_row, num_samples, sampling_method): """Generates a neighborhood around a prediction. For numerical features, perturb them by sampling from a Normal(0,1) and doing the inverse operation of mean-centering and scaling, according to the means and stds in the training data. For categorical features, perturb by sampling according to the training distribution, and making a binary feature that is 1 when the value is the same as the instance being explained. Args: data_row: 1d numpy array, corresponding to a row num_samples: size of the neighborhood to learn the linear model sampling_method: 'gaussian' or 'lhs' Returns: A tuple (data, inverse), where: data: dense num_samples * K matrix, where categorical features are encoded with either 0 (not equal to the corresponding value in data_row) or 1. The first row is the original instance. inverse: same as data, except the categorical features are not binary, but categorical (as the original data) """ is_sparse = sp.sparse.issparse(data_row) if is_sparse: num_cols = data_row.shape[1] data = sp.sparse.csr_matrix((num_samples, num_cols), dtype=data_row.dtype) else: num_cols = data_row.shape[0] data = np.zeros((num_samples, num_cols)) categorical_features = range(num_cols) if self.discretizer is None: instance_sample = data_row scale = self.scaler.scale_ mean = self.scaler.mean_ if is_sparse: # Perturb only the non-zero values non_zero_indexes = data_row.nonzero()[1] num_cols = len(non_zero_indexes) instance_sample = data_row[:, non_zero_indexes] scale = scale[non_zero_indexes] mean = mean[non_zero_indexes] if sampling_method == 'gaussian': data = self.random_state.normal( 0, 1, num_samples * num_cols).reshape(num_samples, num_cols) data = np.array(data) elif sampling_method == 'lhs': data = lhs(num_cols, samples=num_samples).reshape(num_samples, num_cols) means = np.zeros(num_cols) stdvs = np.array([1] * num_cols) for i in range(num_cols): data[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(data[:, i]) data = np.array(data) else: warnings.warn( '''Invalid input for sampling_method. Defaulting to Gaussian sampling.''', UserWarning) data = self.random_state.normal( 0, 1, num_samples * num_cols).reshape(num_samples, num_cols) data = np.array(data) if self.sample_around_instance: data = data * scale + instance_sample else: data = data * scale + mean if is_sparse: if num_cols == 0: data = sp.sparse.csr_matrix( (num_samples, data_row.shape[1]), dtype=data_row.dtype) else: indexes = np.tile(non_zero_indexes, num_samples) indptr = np.array( range(0, len(non_zero_indexes) * (num_samples + 1), len(non_zero_indexes))) data_1d_shape = data.shape[0] * data.shape[1] data_1d = data.reshape(data_1d_shape) data = sp.sparse.csr_matrix( (data_1d, indexes, indptr), shape=(num_samples, data_row.shape[1])) categorical_features = self.categorical_features first_row = data_row else: first_row = self.discretizer.discretize(data_row) data[0] = data_row.copy() inverse = data.copy() for column in categorical_features: values = self.feature_values[column] freqs = self.feature_frequencies[column] inverse_column = self.random_state.choice(values, size=num_samples, replace=True, p=freqs) binary_column = (inverse_column == first_row[column]).astype(int) binary_column[0] = 1 inverse_column[0] = data[0, column] data[:, column] = binary_column inverse[:, column] = inverse_column if self.discretizer is not None: inverse[1:] = self.discretizer.undiscretize(inverse[1:]) inverse[0] = data_row return data, inverse
# Hang on to the original values for comparison nominal_values = np.array([p.value for p in p_to_fit]) x_test = np.log10(nominal_values) print "True values (in log10 space):", x_test print "Nominal error:", obj_func(x_test) # Pick a starting point; in practice this would be random selected by # a sampling strategy (e.g., latin hypercube sampling) or from a prior # distribution num_rand = 6 design = lhs(len(p_to_fit), samples=num_rand / len(p_to_fit)) means = x_test stdvs = np.array([0.1, 0.1, 0.1]) for alp in range(len(p_to_fit)): design[:, alp] = norm(loc=means[alp], scale=stdvs[alp]).ppf(design[:, alp]) #Create a list of methods meth_list = [ 'Nelder-Mead', 'Powell', 'COBYLA', 'TNC', 'L-BFGS-B', 'CG', 'BFGS', 'SLSQP', 'trust-ncg', 'Newton-CG' ] met_list = ['dogleg'] #Create arrays for storing no. of function evaluations and objective function values func_eval = np.zeros((len(meth_list), num_rand / len(p_to_fit))) obj_val = np.zeros((len(meth_list), num_rand / len(p_to_fit))) #Run the minimization algorithm for each initial value
def AR1_logpdf(value, k, tau_e): return (sp.norm(loc=0, scale=1 / np.sqrt(tau_e)).logpdf(value[0]) + sp.norm(loc=k * value[:-1], scale=1 / np.sqrt(tau_e)).logpdf( value[1:]).sum())
fresh[this_index] = this if that_index in fresh: del fresh[that_index] else: fresh[that_index] = that if this_index in fresh: del fresh[this_index] found = True break if not found: fresh[this_index] = this result = [] for i in fresh: result.append(x[i]) # result.append(list(i.exterior.coords)[:-1]) return result if __name__ == '__main__': x = np.concatenate([norm(1000, 1.).rvs(400), norm(20000, 1.).rvs(100)]) print x print mode(x) af = AffinityPropagation(preference=-50) af.fit(x[:, np.newaxis]) print dir(af) print af.cluster_centers_indices_ print len(af.cluster_centers_indices_)
def gaussian(mu=0, sd=1): return D.norm(mu, sd)
def gaussian(mu=0,sd=1): return D.norm(mu,sd)
def design_lhs_exp(variables, maps, offsets=None, samples=int(1e4), project_linear=True): """ Design an LHS experiment """ design = lhs(len(variables), samples=samples, criterion="m", iterations=100) z_design = np.zeros_like(design) print "Computing LHS design..." if project_linear: print " using linear re-projection for log variables" else: print " using original variable coordinate" for i, v in enumerate(variables): dist, a, b = v[3] if project_linear: # Re-sample in linear space if v[0].startswith("ln"): ## 9/4/2014 ## This is an experimental correction to re-project the ## logarithmic variables into their normal coordinate ## system. It should only effect the sampling, and hopefully ## improve it by forcing it to even things out over the ## actually range we care about a = np.exp(a) b = np.exp(b) offsets[i] = np.exp(offsets[i]) elif v[0].startswith("log"): ## 10/26/2014 ## In accordance with above, but for log10 vars a = 10.0 ** a b = 10.0 ** b offsets[i] = 10.0 ** offsets[i] if offsets: ## These corrections with "offsets" re-center the interval ## so that the left endpoint is 0. I found that if arbitrary ## lower/upper limits were used, sometimes the PPF routines ## would really mess up in inverting the CDF. a, b = a - offsets[i], b - offsets[i] if dist == "uniform": design[:, i] = uniform(a, b).ppf(design[:, i]) elif dist == "normal": design[:, i] = norm(a, b).ppf(design[:, i]) elif dist == "loguniform": design[:, i] = loguni_ppf(design[:, i], a, b) else: raise ValueError("no dist defined for %s" % dist) if offsets: ## Project back in to the correct limits design[:, i] += offsets[i] a, b = a + offsets[i], b + offsets[i] if project_linear: if v[0].startswith("ln"): ## 9/4/2014 ## Second half of correction a = np.log(a) b = np.log(b) design[:, i] = np.log(design[:, i]) elif v[0].startswith("log"): ## 10/26/2014 a = np.log10(a) b = np.log10(b) design[:, i] = np.log10(design[:, i]) z_design[:, i] = maps[i](design[:, i], a, b) design = design.T # in x-coords z_design = z_design.T return design, z_design
def nestedSimulationEX1(Ndata, K1=100, tn=50, n_repeat=1, sampling='mc'): ''' Generation of nested simulation samples at the time horizon. Single asset example in Broadie et al (2014) by L. Wang Tested. Parameters ========== Ndata : integer no. of training samples K1 : integer no. of inner loops tn : integer no. of time steps in between t = \tau and T n_repeat : integer no. of iterations for computing expectations sigma : float volatility factor in diffusion term Returns ======= Stau : Ndata x 1 nparray realizations of underlying price at t = \tau Loss : Ndata x n_repeat nparray portfolio loss of corresponding Stau t_prep : float time spent in this procedure ''' import time #from doe_lhs import lhs from scipy.stats.distributions import norm # --- Parameters --- S0 = 100.; mu = 0.08; sigma = 0.2 rfr = 0.03; T = 1./12.; tau = 1./52. K = np.array([101., 110., 114.5]) H = np.array([91., 100., 104.5]) pos = np.array([1., 1., -1.]) n = len(K) # number of options t0 = time.time() # timer starts # --- portfolio price @ t = 0 --- V0 = np.zeros(n) for i in range(n): V0[i] = rear_end_dnOutPut(S0, K[i], rfr, T, sigma, H[i], tau, q=0.) Value0 = np.sum(pos * V0) # --- portfolio loss distribution @ t = \tau --- # draw samples and generates real-world scenarios if sampling == 'mc': sn = npr.standard_normal((Ndata, 1)) # be careful of the vector size elif sampling == 'lhs': sn = lhs(1,samples=Ndata); sn = norm(loc=0,scale=1).ppf(sn) Stau = np.zeros((Ndata, 1)) Stau[:] = S0 Stau = Stau * np.exp((mu - 0.5 * sigma ** 2) * tau + sigma * np.sqrt(tau) * sn) if n_repeat == 1: Vtau = np.zeros((Ndata, n)) ValueTau = np.zeros(Ndata) # be careful of the vector size for i in range(Ndata): for j in range(n): Vtau[i][j] = dnOutPut_nmcs(Stau[i], K[j], rfr, T-tau, sigma, H[j], M=tn, I=K1) ValueTau[i] = np.sum(pos * Vtau[i]) else: ValueTau = np.zeros((Ndata,n_repeat)) # be careful of the vector size for i in range(n_repeat): Vtau = np.zeros((Ndata, n)) for j in range(Ndata): for k in range(n): Vtau[j][k] = dnOutPut_nmcs(Stau[j], K[k], rfr, T-tau, sigma, H[k], M=tn, I=K1) ValueTau[j][i] = np.sum(pos * Vtau[j]) Loss = Value0 - ValueTau t_prep = time.time() - t0 # timer off return Stau, Loss, t_prep
# set the mean and the range of the sampling c_mean, c_stdev = 1.0, 0.2 x_mean, x_stdev = 10.0, 2.0 # generate arrays of sampling values c_arr = linspace( c_mean - ( 4 * c_stdev ), c_mean + ( 4 * c_stdev ), n_int ) x_arr = linspace( x_mean - ( 4 * x_stdev ), x_mean + ( 4 * x_stdev ), n_int ) # grid distances dc = ( c_arr[-1] - c_arr[0] ) / n_int dx = ( x_arr[-1] - x_arr[0] ) / n_int # construct the normal distribution and get the method # for the evaluation of the cumulative probability pdf_c = norm( loc = c_mean, scale = c_stdev ).pdf pdf_x = norm( loc = x_mean, scale = x_stdev ).pdf def Heaviside( x ): ''' Heaviside function ''' #@TODO: same as definition return ( sign( x ) + 1.0 ) / 2. def q( e, c, x ): ''' Response function of a single fiber ''' return c * e * Heaviside( x - e ) # prepare the sequence of the control strains # evaluate the response for an array of values of the control variable e_arr = linspace( 0, 20, 100 ) # define an array of the same size as e_arr
# -*- coding: utf-8 -*- """ Created on Mon Mar 16 11:04:04 2015 @author: jelle """ import sys,os path='C:/PostDoc/Python/IM/BASICS/pyDOE-0.3.7' sys.path.append(path) from pyDOE import * from scipy.stats.distributions import norm #basis: dit is voor eentje lhd = lhs(2, samples=5) lhd = norm(loc=0, scale=1).ppf(lhd) # this applies to both factors here #vier verdelingen en maak een reeks van 20 samples design = lhs(4, samples=20) from scipy.stats.distributions import norm means = [1, 2, 3, 4] stdvs = [0.1, 0.5, 1, 0.25] for i in xrange(4): design[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(design[:, i])
from scipy.stats.distributions import norm, uniform # import normal distribution import pylab as p # import matplotlib with matlab interface import numpy as np # import numpy package from time import clock if __name__ == '__main__': n_rv = 2 # number of random variables n_int = 5 # number of discretization points # set the mean and standard deviation of the two random variables m_la, std_la = 10.0, 1.0 m_xi, std_xi = 1.0, 0.1 # construct the normal distributions and get the methods # for the evaluation of the probability density functions g_la = norm(loc = m_la, scale = std_la) g_xi = norm(loc = m_xi, scale = std_xi) # discretize the range (-1,1) symmetrically with n_int points theta_arr = np.linspace(-(1.0 - 1.0 / n_int), 1.0 - 1.0 / n_int , n_int) # cover the random variable symmetrically around the mean theta_la = m_la + 4 * std_la * theta_arr theta_xi = m_xi + 4 * std_xi * theta_arr # get hte size of the integration cell d_la = (8 * std_la) / n_int d_xi = (8 * std_xi) / n_int def Heaviside(x): ''' Heaviside function ''' return (np.sign(x) + 1.0) / 2.0
rnd_array_6 = np.array(rnd_array_6) # Calculate gumbel parameters qfd_scale = (qfd_std * (6**0.5)) / np.pi qfd_loc = qfd_mean - (0.5722 * qfd_scale) # Near field standard deviation std_nft = (1.939 - (np.log(avg_nft) * 0.266)) * avg_nft # Convert LHS probabilities to distribution invariants comb_lhs = linear_dist(com_eff_min, com_eff_max, rnd_array_4) qfd_lhs = gumbel_r(loc=qfd_loc, scale=qfd_scale).ppf(rnd_array_1) * comb_lhs glaz_lhs = linear_dist(glaz_min, glaz_max, rnd_array_2) beam_lhs = linear_dist(beam_min, beam_max, rnd_array_3) * depth spread_lhs = linear_dist(spread_min, spread_max, rnd_array_5) nft_lhs = norm(loc=avg_nft, scale=std_nft).ppf(rnd_array_6) # initialise output arrays peak_st_fract = [] peak_st_temp = [] for i in range(0, lhs_iterations): dict_inputs = { "window_height": win_height, "window_width": win_width, "window_open_fraction": glaz_lhs[i], "room_breadth": breadth, "room_depth": depth, "room_height": height, "fire_load_density": qfd_lhs[i], "fire_hrr_density": hrr_pua,
def normatval(mu, sigma, x): normdist = dist.norm() return normdist.pdf((x - mu) / sigma)
def distribution(self, t: float) -> rv_frozen: return norm(self.mu * t, self.sigma * np.sqrt(t))
from numpy import * import pymc from scipy import stats from scipy.stats import distributions as d #parameters about the da dimensions = 100 observations = 100 shape = (dimensions, observations) data = d.norm( loc = 0, scale = 1).rvs((dimensions, observations)) def model_gen(): variables = [] means = pymc.Normal("means",mu = zeros(dimensions), tau = ones(dimensions)) sds = pymc.Gamma("sds", alpha = ones(dimensions) * 1 , beta = ones(dimensions) * 1) variables.append(means) variables.append (sds) @pymc.deterministic def precisions ( stdev = sds): precisions = (ones(shape) * (stdev**-2)[:, newaxis]).ravel() return precisions @pymc.deterministic def obsMeans (means = means):
import platform import time if platform.system() == 'Linux': sysclock = time.time elif platform.system() == 'Windows': sysclock = time.clock if __name__ == '__main__': n_rv = 2 # set the mean and standard deviation of the two random variables m_la, std_la = 10.0, 1.0 m_xi, std_xi = 1.0, 0.1 # construct objects representing normal distribution pdistrib_la = norm(loc = m_la, scale = std_la) pdistrib_xi = norm(loc = m_xi, scale = std_xi) # for the evaluation of the probability density functions g_la = pdistrib_la.pdf g_xi = pdistrib_xi.pdf n_int = 10 # number of discretization points # discretize the range (-1,1) symmetrically with n_int points theta_arr = np.linspace(-(1.0 - 1.0 / n_int), 1.0 - 1.0 / n_int , n_int) # cover the random variable symmetrically around the mean theta_la = m_la + 4 * std_la * theta_arr theta_xi = m_xi + 4 * std_xi * theta_arr
from pyDOE import * from scipy.stats.distributions import norm # Latin Hypercube Sampling # see: https://pythonhosted.org/pyDOE/randomized.html # Run LHS for n factors X = lhs(4, samples=100) # lhs(n, [samples, criterion, iterations]) # Transform factors to normal distributions with means and standard deviations means = [1, 2, 3, 4] stdvs = [0.1, 0.5, 1, 0.25] for i in range(4): X[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(X[:, i])
from sklearn.model_selection import GridSearchCV from sklearn.neighbors import KernelDensity import numpy as np from scipy.stats.distributions import norm x = np.concatenate([norm(-1, 1.).rvs(400),norm(1, 0.3).rvs(100)]) y = np.concatenate([norm(0, 1.).rvs(400),norm(1, 0.3).rvs(100)]) grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(-4.5, 3.5, 100)},cv=20)#np.linspace Return evenly spaced numbers over a specified interval. # 20-fold cross-validation grid.fit(x[:, None]) print grid.best_params_
import pymc3 as pm import numpy as np import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import distributions as dist import theano import arviz as az import warnings warnings.filterwarnings('ignore') x = np.array([1, 5, 8]) y = 1 + x + dist.norm(0, 1.5).rvs(3) y = y.reshape(-1, 1) x = x.reshape(-1, 1) xs = x ys = y print(x.shape) initial_w = dist.uniform(0, 1).rvs(1) with pm.Model() as NN: x = pm.Data('x', x) y = pm.Data('y', y) w = pm.Normal('w', mu=0, sigma=10, shape=[1, 1], testval=initial_w) l1 = pm.Deterministic('l1', pm.math.dot(x, w)) y_l = pm.Normal('y_l', l1, observed=y) trace = pm.sample(cores=1) pp = pm.sample_posterior_predictive(trace, random_seed=123) plt.plot(xs, pp['y_l'].mean(axis=0).reshape(1, -1).flatten())
def sample_pspace(model, param_list=None, bounds=None, samples=100, seed=None): """ A DataFrame where each row represents a location in the parameter space, locations distributed to exercise the full range of values that each parameter can take on. This is useful for quick and dirty application of tests to a bunch of locations in the sample space. Kind-of a fuzz-testing for the model. Uses latin hypercube sampling, with random values within the sample bins. The LHS sampler shuffles the bins each time, so a subsequent call will yield a different sample from the parameter space. When a variable has both upper and lower bounds, use a uniform sample between those bounds. When a variable has only one bound, use an exponential distribution with the scale set to be the difference between the bound and the current model value (1 if they are the same) When the variable has neither bound, use a normal distribution centered on the current model value, with scale equal to the absolute value of the model value (1 if that magnitude is 0) Parameters ---------- model: pysd.Model object param_list: None or list of strings The real names of parameters to include in the explored parameter space. If None, uses all of the constants in the model except TIME STEP, INITIAL TIME, etc. bounds: DataFrame, string filename, or None A range test matrix as used for bounds checking. If None, creates one from the model These bounds can also place artificial limits on the parameter space you want to explore, even if the theoretical bounds on the variable are infinite. samples: int How many samples to include in the iterator? Returns ------- lhs : pandas DataFrame distribution-weighted latin hypercube samples Note ---- Executes the model by 1 time-step to get the current value of parameters. """ if param_list is None: doc = model.doc() param_list = sorted(list(set(doc[doc['Type'] == 'constant']['Real Name']) - {'FINAL TIME', 'INITIAL TIME', 'TIME STEP', 'TIME STEP'})) if isinstance(bounds, _pd.DataFrame): bounds = bounds.set_index('Real Name') elif bounds is None: bounds = create_bounds_test_matrix(model).set_index('Real Name') elif isinstance(bounds, str): if bounds.split('.')[-1] in ['xls', 'xlsx']: bounds = _pd.read_excel(bounds, sheetname='Bounds', index_col='Real Name') elif bounds.split('.')[-1] == 'csv': bounds = _pd.read_csv(bounds, index_col='Real Name', encoding='UTF-8') elif bounds.split('.')[-1] == 'tab': bounds = _pd.read_csv(bounds, sep='\t', index_col='Real Name', encoding='UTF-8') else: raise ValueError('Unknown file type: bounds') else: raise ValueError('Unknown type: bounds') if seed is not None: _np.random.seed(seed) unit_lhs = _pd.DataFrame(_pyDOE.lhs(n=len(param_list), samples=samples), columns=param_list) # raw latin hypercube sample res = model.run(return_timestamps=[model.components.initial_time()]) lhs = _pd.DataFrame(index=unit_lhs.index) for param in param_list: lower, upper = bounds[['Min', 'Max']].loc[param] value = res[param].iloc[0] if lower == upper: lhs[param] = lower elif _np.isfinite(lower) and _np.isfinite(upper): # np.isfinite(0)==True scale = upper - lower lhs[param] = _dist.uniform(lower, scale).ppf(unit_lhs[param]) elif _np.isfinite(lower) and _np.isinf(upper): if lower == value: scale = 1 else: scale = value - lower lhs[param] = _dist.expon(lower, scale).ppf(unit_lhs[param]) elif _np.isinf(lower) and _np.isfinite(upper): # np.isinf(-np.inf)==True if upper == value: scale = 1 else: scale = upper - value lhs[param] = upper - _dist.expon(0, scale).ppf(unit_lhs[param]) elif _np.isinf(lower) and _np.isinf(upper): # np.isinf(-np.inf)==True if value == 0: scale = 1 else: scale = abs(value) lhs[param] = _dist.norm(value, scale).ppf(unit_lhs[param]) else: raise ValueError('Problem with lower: %s or upper: %s bounds' % (lower, upper)) return lhs
def get_ls_factor(n_uncertain, n_samples, pc_order, lamb=0.0): # Uncertain parameter design sobol_design = sobol_seq.i4_sobol_generate(n_uncertain, n_samples, ceil(np.log2(n_samples))) sobol_samples = np.transpose(sobol_design) for i in range(n_uncertain): sobol_samples[i, :] = norm(loc=0., scale=1).ppf(sobol_samples[i, :]) # Polynomial function definition x = SX.sym('x') he0fcn = Function('He0fcn', [x], [1.]) he1fcn = Function('He1fcn', [x], [x]) he2fcn = Function('He2fcn', [x], [x**2 - 1]) he3fcn = Function('He3fcn', [x], [x**3 - 3 * x]) he4fcn = Function('He4fcn', [x], [x**4 - 6 * x**2 + 3]) he5fcn = Function('He5fcn', [x], [x**5 - 10 * x**3 + 15 * x]) he6fcn = Function('He6fcn', [x], [x**6 - 15 * x**4 + 45 * x**2 - 15]) he7fcn = Function('He7fcn', [x], [x**7 - 21 * x**5 + 105 * x**3 - 105 * x]) he8fcn = Function('He8fcn', [x], [x**8 - 28 * x**6 + 210 * x**4 - 420 * x**2 + 105]) he9fcn = Function('He9fcn', [x], [x**9 - 36 * x**7 + 378 * x**5 - 1260 * x**3 + 945 * x]) he10fcn = Function( 'He10fcn', [x], [x**10 - 45 * x**8 + 640 * x**6 - 3150 * x**4 + 4725 * x**2 - 945]) helist = [ he0fcn, he1fcn, he2fcn, he3fcn, he4fcn, he5fcn, he6fcn, he7fcn, he8fcn, he9fcn, he10fcn ] # Calculation of factor for least-squares xu = SX.sym("xu", n_uncertain) exps = (p for p in product(range(pc_order + 1), repeat=n_uncertain) if sum(p) <= pc_order) next(exps) exps = list(exps) psi = SX.ones( int( factorial(n_uncertain + pc_order) / (factorial(n_uncertain) * factorial(pc_order)))) for i in range(len(exps)): for j in range(n_uncertain): psi[i + 1] *= helist[exps[i][j]](xu[j]) psi_fcn = Function('PSIfcn', [xu], [psi]) nparameter = SX.size(psi)[0] psi_matrix = SX.zeros(n_samples, nparameter) for i in range(n_samples): psi_a = psi_fcn(sobol_samples[:, i]) for j in range(SX.size(psi)[0]): psi_matrix[i, j] = psi_a[j] psi_t_psi = mtimes(psi_matrix.T, psi_matrix) + lamb * DM.eye(nparameter) chol_psi_t_psi = chol(psi_t_psi) inv_chol_psi_t_psi = solve(chol_psi_t_psi, SX.eye(nparameter)) inv_psi_t_psi = mtimes(inv_chol_psi_t_psi, inv_chol_psi_t_psi.T) ls_factor = mtimes(inv_psi_t_psi, psi_matrix.T) ls_factor = DM(ls_factor) # Calculation of expectations for variance function n_sample_expectation_vector = 100000 x_sample = np.random.multivariate_normal(np.zeros(n_uncertain), np.eye(n_uncertain), n_sample_expectation_vector) psi_squared_sum = DM.zeros(SX.size(psi)[0]) for i in range(n_sample_expectation_vector): psi_squared_sum += psi_fcn(x_sample[i, :])**2 expectation_vector = psi_squared_sum / n_sample_expectation_vector return ls_factor, expectation_vector, psi_fcn