예제 #1
0
def example():
    xy_grid = make_xy_grid(-4.5, 3.5, 100)
    x = np.concatenate([norm(-1, 1.).rvs(400),
                        norm(1, 0.3).rvs(100)])
    x = x.reshape([250, 2])
    kde, bandwidth = kde_fit_cv(x)
    pdf = kde_eval(kde, xy_grid)
    plot(xv, yv, pdf)
 def test_cdf_normal_case(self):
     (g, m, s) = (0., 0., 1.)
     assert_almost_equal(pearson3(g).cdf([0., .25, .5, 1.]),
                         dist.norm().cdf([0., .25, .5, 1.]))
     assert_almost_equal(pearson3(g).pdf([0., .25, .5, 1.]),
                         dist.norm().pdf([0., .25, .5, 1.]))
     assert_almost_equal(pearson3(g).ppf([0.001, 0.25, 0.50, 0.999]),
                         dist.norm().ppf([0.001, 0.25, 0.50, 0.999]),)
     assert_almost_equal(pearson3(g).sf([0., .25, .5, 1.]),
                         dist.norm().sf([0., .25, .5, 1.]))
     assert_almost_equal(pearson3(g).isf([0.001, 0.25, 0.50, 0.999]),
                         dist.norm().isf([0.001, 0.25, 0.50, 0.999]))
예제 #3
0
def testImportanceMixing(popsize = 5000, forcedRefresh = 0.0):
    import pylab
    distr1 = norm()
    distr2 = norm(loc = 1.5)
    p1 = distr1.rvs(popsize)
    inds, np = importanceMixing(p1, distr1.pdf, distr2.pdf, lambda: distr2.rvs()[0], forcedRefresh)
    reuse = [p1[i] for i in inds]
    p2 = reuse + np
    p2b = distr2.rvs(popsize)
    pylab.hist(array([p2, p2b]).T,
               20, normed=1, histtype='bar')
    pylab.show()
예제 #4
0
파일: gaussian.py 프로젝트: fmassot/stats
def test_max_likelihood():
    import pdb
    pdb.set_trace()
    x = [1., 2.]
    obs = gaussian_generator(mu=x[0], sigma=x[1])
    pdf = norm(x[0], x[1]).pdf

    func_to_minimize = lambda x: -get_log_likelihood( norm(x[0], x[1]).pdf, obs )

    solver = Solver(func_to_minimize, minimize)

    x0 = [0., 0.5]
    res = solver.solve(x0)
    print res
예제 #5
0
파일: theta.py 프로젝트: CoAxLab/radd
def init_distributions(pkey, kind='dpm', nrvs=25, tb=.65, force_normal=False):
    """ sample random parameter sets to explore global minima (called by
    Optimizer method __hop_around__())
    """
    loc, scale = get_theta_params(pkey, kind=kind)
    bounds = get_bounds(kind=kind)[pkey]
    lower = np.min(bounds)
    upper = np.max(bounds)
    normal_params = ['a', 'tr', 'v', 'vd', 'ssv', 'sso', 'xb', 'z', 'Beta']
    uniform_params = ['vi', 'BX', 'AX', 'PX', 'si']

    # init and freeze dist shape
    if pkey in normal_params:
        dist = norm(loc, scale)
    # elif pkey in gamma_params:
    #     dist = gamma(1.0, loc, scale)
    elif pkey in uniform_params:
        dist = uniform(loc, scale)
    # generate random variates
    rvinits = dist.rvs(nrvs)
    while rvinits.min() < lower:
        # apply lower limit
        ix = rvinits.argmin()
        rvinits[ix] = dist.rvs()
    while rvinits.max() > upper:
        # apply upper limit
        ix = rvinits.argmax()
        rvinits[ix] = dist.rvs()
    if pkey =='tr':
        rvinits = np.abs(rvinits)
    rvinits[rvinits<lower] = lower
    rvinits[rvinits>upper] = upper
    return rvinits
예제 #6
0
	def analy(self,N_o):
		''' Analytical solution
		'''
		# --- portfolio loss distribution @ t = \tau via analytical formulas ---
		opt_val = lambda S: - 5.*bp.blsprice(S,self.K,self.rfr,self.T-self.tau,self.sigma,'put') \
							- 10.*bp.dnOutCall(S,self.K,self.rfr,self.T-self.tau,self.sigma,self.H)

		#ran1 = npr.multivariate_normal(np.zeros(self.D),np.eye(self.D),N_o)
		#ran1 = npr.standard_normal((N_o,self.D))
		ran1 = norm(loc=0,scale=1).ppf(lhs(self.D,samples=N_o))
		S1 = np.zeros((N_o,self.D))
		S1[:,:] = self.S0
		S1[:,:] = S1[:,:] * np.exp((self.mu - 0.5 * self.sigma**2) * self.tau\
				 + self.sigma * np.sqrt(self.tau) * ran1[:,:])
		
		t0 = time.time()
		ValueTau = np.zeros(N_o)
		for n in range(N_o):
			ValueTau[n] = np.sum(map(opt_val,S1[n,:]))
		print "%.2fs eclipsed" % (time.time() - t0)

		L_analy = np.sort(self.Value0 - ValueTau)
		#L_analy = np.sort(-ValueTau + self.Value0 * np.exp(self.rfr*self.tau))
		var = scs.scoreatpercentile(L_analy, self.perc*100.)
		eel = np.mean(np.maximum(L_analy-var,0))
		return (var, eel)
    def fit(self, X, y):
        X = numpy.array(X)
        self.initial_values = []
        self.transformed_values = []
        for axis in range(X.shape[1]):
            initial_values = X[:, axis] * (1 + 1e-6 * numpy.random.normal(size=len(X)))
            initial_values += 1e-8 * numpy.random.normal(size=len(X))
            indices = numpy.argsort(initial_values)
            initial_values = initial_values[indices]
            self.initial_values.append(initial_values)
            transformed = numpy.arange(len(X), dtype='float')
            # increase the distance between neighs of different classes
            additions = numpy.abs(numpy.diff(y[indices]))
            additions = numpy.cumsum(additions)
            transformed[1:] += additions * self.scale
            transformed /= transformed[-1] / 2.
            transformed -= 1

            if self.like_normal:
                # converting to normal-like distributions
                transformed -= transformed[0]
                transformed /= transformed[-1] / 0.9
                transformed += 0.05
                transformed = norm().ppf(transformed)

            self.transformed_values.append(transformed)

        return self
    def add_set_value_random_norm(self, variable, means, stdvs ) :
        """ Add a 'Set Value' macro command where the value is chosen from a
        random normal distribution.

        Parameters
        ----------
        variable: string
            An AnyScript variable or a list of AnyScript variables.
        means: int,float, numpy.ndarray
            The mean value of the random number
        stdvs: The standar deviation of the random variable


        Examples
        --------
            Set variable across different macros

        >>> seed(1)
        >>> mg = MonteCarloMacroGenerator(number_of_macros=5)
        >>> mg.add_set_value_random_norm('Main.Var', means = [1,2,4], stdvs = [0.1,0.5,2])
        >>> for line in mg.generate_macros(): pprint(line)
        ['classoperation Main.Var "Set Value" --value="{1,2,4}"']
        ['classoperation Main.Var "Set Value" --value="{0.979048215908,2.29190287213,-3.36989533908}"']
        ['classoperation Main.Var "Set Value" --value="{0.948229648476,1.47477555917,1.34701845466}"']
        ['classoperation Main.Var "Set Value" --value="{0.910823783045,1.80133318708,3.47655384811}"']
        ['classoperation Main.Var "Set Value" --value="{1.00974531575,1.8980227331,4.96468967866}"']

        """
        dist = distributions.norm(means,stdvs)
        self.add_set_value_random(variable,dist)
예제 #9
0
파일: correlate.py 프로젝트: tisimst/mcerp
def induce_correlations(data, corrmat):
    """
    Induce a set of correlations on a column-wise dataset
    
    Parameters
    ----------
    data : 2d-array
        An m-by-n array where m is the number of samples and n is the
        number of independent variables, each column of the array corresponding
        to each variable
    corrmat : 2d-array
        An n-by-n array that defines the desired correlation coefficients
        (between -1 and 1). Note: the matrix must be symmetric and
        positive-definite in order to induce.
    
    Returns
    -------
    new_data : 2d-array
        An m-by-n array that has the desired correlations.
        
    """
    # Create an rank-matrix
    data_rank = np.vstack([rankdata(datai) for datai in data.T]).T

    # Generate van der Waerden scores
    data_rank_score = data_rank / (data_rank.shape[0] + 1.0)
    data_rank_score = norm(0, 1).ppf(data_rank_score)

    # Calculate the lower triangular matrix of the Cholesky decomposition
    # of the desired correlation matrix
    p = chol(corrmat)

    # Calculate the current correlations
    t = np.corrcoef(data_rank_score, rowvar=0)

    # Calculate the lower triangular matrix of the Cholesky decomposition
    # of the current correlation matrix
    q = chol(t)

    # Calculate the re-correlation matrix
    s = np.dot(p, np.linalg.inv(q))

    # Calculate the re-sampled matrix
    new_data = np.dot(data_rank_score, s.T)

    # Create the new rank matrix
    new_data_rank = np.vstack([rankdata(datai) for datai in new_data.T]).T

    # Sort the original data according to new_data_rank
    for i in range(data.shape[1]):
        vals, order = np.unique(
            np.hstack((data_rank[:, i], new_data_rank[:, i])), return_inverse=True
        )
        old_order = order[: new_data_rank.shape[0]]
        new_order = order[-new_data_rank.shape[0] :]
        tmp = data[np.argsort(old_order), i][new_order]
        data[:, i] = tmp[:]

    return data
예제 #10
0
def test_box_normal_pdf():
    """
    Testing normal distribution for box approach
    """
    vx = np.linspace(-vmax, vmax, 1000)
    f = box_to_pdf(data, vx)
    ftrue = norm(0, sigma).pdf(vx)
    error = relative_L2_error(f, ftrue, vx)
    assert_almost_equal(error, 0, decimal=2)
예제 #11
0
def test_normal_pdf():
    """
    Testing normal distribution for point approach
    """
    x = np.linspace(-vmax, vmax, 1000)
    fapprox = data_to_pdf(data[:, 3], x)
    ftrue = norm(0, sigma).pdf(x)
    error = relative_L2_error(fapprox, ftrue, x)
    assert_almost_equal(error, 0, decimal=2)
예제 #12
0
    def nbTL(self, node):
        probs = [1,1]
##        print(node.tag)
##        print(node.attrib['position'])
##        print(node.attrib['length'])
##        print(node.attrib['size'])
##        print(node.attrib['left'])
##        print(node.attrib['width'])
##        print(node.attrib['height'])
##        print(node.attrib['top'])
        for label in range(2):        
            if node.tag in self.mtagid[1]:
                probs[label] *= self.mtagid[label][node.tag]
            else:
                probs[label] *= 0

            if node.attrib['position'] == '0':
                probs[label] *= self.mpos[label][0]
            else:
                probs[label] *= self.mpos[label][1]

            normer = distributions.norm(self.mlen[label][0], self.mlen[label][1])
            probs[label] *= normer.pdf(int(node.attrib['length']))

            normer = distributions.norm(self.msize[label][0], self.msize[label][1])
            probs[label] *= normer.pdf(int(node.attrib['size']))**2
            
            normer = distributions.norm(self.mleft[label][0], self.mleft[label][1])
            probs[label] *= normer.pdf(int(node.attrib['left']))

            normer = distributions.norm(self.mwidth[label][0], self.mwidth[label][1])
            probs[label] *= normer.pdf(int(node.attrib['width']))

            #normer = distributions.norm(self.mheight[label][0], self.mheight[label][1])
            #probs[label] *= (normer.pdf(int(node.attrib['height'])))**0.5
        if probs[1] > probs[0]:
            #print(probs)
            return True
        else:
            return False
예제 #13
0
def generate_init(model, jj, ns, output_file, lognorm=False, best_reg=True):
	#select the parameters
	if best_reg:
		p_to_fit = [p for p in model.parameters if p.name[0] == 'k']
		num_ind = len(p_to_fit)*ns
                ini_val = lhs(len(p_to_fit), samples = ns)
		fname_new = 'stat-%d.pkl' % (jj)
		stat = pickle.load(open(fname_new, "rb"))
		means = stat[0]
		stdvs = stat[1]
		if lognorm:
                	 for ind in range(len(p_to_fit)):
                                ini_val[:,ind] = norm(loc=means[ind], scale=stdvs[ind]).ppf(ini_val[:,ind])
                else:
                        # First shift unit hypercube to be centered around origin
                        # Then scale hypercube along each dimension by 2 stdevs
                        # Finally shift hypercube to be centered around nominal values
                        ini_val = means + 2 * stdvs * (ini_val - 0.5)
	else:
		p_to_fit = [p for p in model.parameters if p.name[0] == 'k']
		nominal_values = np.array([q.value for q in p_to_fit])
		log_nominal_values = np.log10(nominal_values)

		#latin hypercube sampling for picking a starting point
		num_ind = len(p_to_fit)*ns
		ini_val = lhs(len(p_to_fit), samples = ns)
		means = log_nominal_values
		stdvs = np.ones(len(p_to_fit))
		if lognorm:
			for ind in range(len(p_to_fit)):
				ini_val[:,ind] = norm(loc=means[ind], scale=stdvs[ind]).ppf(ini_val[:,ind])
		else:
			# First shift unit hypercube to be centered around origin
			# Then scale hypercube along each dimension by 2 stdevs
			# Finally shift hypercube to be centered around nominal values
			ini_val = means + 2 * stdvs * (ini_val - 0.5)

	np.save(output_file, ini_val)
예제 #14
0
 def test_lstats_gaussian(self):
     "Try lstats on Gaussian"
     _nor = [ 0.        ,  0.56418958,  0.        ,  0.12260172,  0.        ,
              0.04366115,  0.        ,  0.02184314,  0.        ,  0.0129635 ,
              0.        ,  0.00852962,  0.        ,  0.00601389,  0.        ,
              0.00445558,  0.        ,  0.00342643,  0.        ,  0.00271268]
     assert_almost_equal(dist.norm.lstats(20), _nor)
     assert_almost_equal(dist.norm.lstats(20, 0., 1.), _nor)
     _nor = [ 1.        ,  1.69256875,  0.        ,  0.12260172,  0.        ,
              0.04366115,  0.        ,  0.02184314,  0.        ,  0.0129635 ,
              0.        ,  0.00852962,  0.        ,  0.00601389,  0.        ,
              0.00445558,  0.        ,  0.00342643,  0.        ,  0.00271268]
     assert_almost_equal(dist.norm(1., 3.).lstats(20), _nor)
     assert_almost_equal(dist.norm.lstats(20, 1., 3.), _nor)
def test_setvalue_random():
    np.random.seed(1)
    fdist = norm(2, [1, 1, 1])

    c = mc.SetValue_random("val", fdist)
    assert c.get_macro(0) == 'classoperation val "Set Value" --value="{2,2,2}"'

    mg = AnyMacro(c)
    macrolist = mg.create_macros_MonteCarlo(2)
    assert macrolist[0][0] == 'classoperation val "Set Value" --value="{2,2,2}"'
    assert (
        macrolist[1][0]
        == 'classoperation val "Set Value" --value="{1.79048215908,2.58380574427,-1.68494766954}"'
    )
    def test_set_value_LHS(self):
        seed(1)
        normdist = norm( [1,3,4], [0.1,0.5,1] )
        mg = LatinHyperCubeMacroGenerator(number_of_macros=4)
        mg.add_set_value_LHS('Main.myvar1',normdist)
        mg.add_set_value_LHS('Main.myvar2', normdist)
        macros =  mg.generate_macros() 
        
        assert len(macros) == 4

        assert macros[0][0] == 'classoperation Main.myvar1 "Set Value" --value="{1,3,4}"'
        assert macros[0][1] == 'classoperation Main.myvar2 "Set Value" --value="{1,3,4}"'
        assert macros[1][0] == 'classoperation Main.myvar1 "Set Value" --value="{0.973478019766,2.64702266602,0.0441088571967}"'
        assert macros[1][1] == 'classoperation Main.myvar2 "Set Value" --value="{1.12273573036,2.17228489089,4.15538179788}"'
예제 #17
0
def get_noise_value_at(x, variance, noise_gen, val_min=0, val_max=1):
    """
    Returns the noise value for noise_gen for a given variance at x.

    The noise_gen is assumed to represent a [0, 1] hypercube and is smoothed
    by a gaussian distribution with variance variance.

    Note that the smoothing is hard-capped at a 3 sigma interval due to
    performance reasons.

    Parameters
    ----------
    x : list of real values
        The values of x. The ith entry represents the value of x in the ith
        dimension.
    variance : float
        The variance of the normal distribution to smooth the noise.
    noise_gen : ndarray
        The array representing the generated noise.
    val_min, val_max : float
        This is used to scale the actual maximum and minimum values to represent
        the same as otherwise values would not be comparable between variances.

    Returns
    -------
    x_value : float
        The value of the function at the point x.
    """
    x_value = 0
    prob_sum = 0
    gaussian = norm(scale=variance)
    dims = len(noise_gen.shape)
    points = len(noise_gen[0])

    closest_idx = _gen_closest_index(x, points)
    close_indices = _gen_close_indices(closest_idx, max(1, int(variance*3*points)),
                                      dims, points)
    for i in close_indices:
        dist = _calc_distance_grid(x, i, points)
        prob = gaussian.pdf(dist)
        prob_sum += gaussian.pdf(dist)
        x_value += prob * noise_gen[i]
    x_value /= prob_sum

    x_value = (x_value - val_min)/(val_max- val_min)

    return x_value
예제 #18
0
def MixedEstimator(abg_init, binnedTrain, dp_tol = 1e-2):
    phis = binnedTrain.bins.keys();
    theta = binnedTrain.theta

    dp = dp_tol*2.0;
    abg = abg_init
    
    while dp > dp_tol:
        Tf = binnedTrain.getTf()
   
        xmin = FPMultiPhiSolver.calculate_xmin(Tf, abg)
        dx = FPMultiPhiSolver.calculate_dx(abg, xmin)
        dt = FPMultiPhiSolver.calculate_dt(dx, abg, xmin, factor = 8.)

        S = FPMultiPhiSolver(theta, phis,
                             dx, dt, Tf, xmin)

        Fs = S.solve(abg, visualize=False)
        Ss = S.transformSurvivorData(binnedTrain)
        Ls = Fs[:,:,-1] - Ss
        Nus = S.solveAdjoint(abg, Ls)
    
        dGdp = S.estimateParameterGradient(abg, Fs, Nus)

        from numpy.linalg.linalg import norm
        
        dG_normalized = dGdp/ norm(dGdp) 
        
        dp = FortetLineEstimator(binnedTrain, abg, dG_normalized, dp_tol)
        
        abg = abg - dp*dG_normalized

        print 'dG = ', dG_normalized
        print 'dp = ', dp
        print 'abg = (%.3g, %.3g, %.3g)'%(abg[0],abg[1],abg[2])
        print '-'

    return abg
예제 #19
0
파일: theta.py 프로젝트: dunovank/radd_kd
def init_distributions(pkey, kind='dpm', mu = None, sigma = None, nrvs=25, tb=.65):
    """ sample random parameter sets to explore global minima (called by
    Optimizer method __hop_around__())
    """
    if mu is None:
        mu = {'a': .15, 'tr': .02, 'v': 1., 'ssv': -1., 'z': .1, 'xb': 1., 'sso': .15, 'vi': .35, 'vd': .5}
    if sigma is None:
        sigma = {'a': .35, 'tr': .25, 'v': .5, 'ssv': .5, 'z': .05, 'xb': .5, 'sso': .01, 'vi': .4, 'vd': .5}
    normal_params = ['tr', 'v', 'vd', 'ssv', 'z', 'xb', 'sso']
    gamma_params = ['a', 'tr']
    uniform_params = ['vd', 'vi']
    if 'race' in kind:
        sigma['ssv'] = abs(mu['ssv'])
    bounds = get_bounds(kind=kind)[pkey]
    loc = mu[pkey]
    scale = sigma[pkey]
    # init and freeze dist shape
    if pkey in normal_params:
        dist = norm(loc, scale)
    elif pkey in gamma_params:
        dist = gamma(1.0, loc, scale)
    elif pkey in uniform_params:
        dist = uniform(loc, scale)
    # generate random variates
    rvinits = dist.rvs(nrvs)
    while rvinits.min() < bounds[0]:
        # apply lower limit
        ix = rvinits.argmin()
        rvinits[ix] = dist.rvs()
    while rvinits.max() > bounds[1]:
        # apply upper limit
        ix = rvinits.argmax()
        rvinits[ix] = dist.rvs()
    if pkey =='tr':
        rvinits = np.abs(rvinits)
    return rvinits
예제 #20
0
def sample_pspace(model, param_list=None, bounds=None, samples=100, seed=None):
    """
    A DataFrame where each row represents a location in the parameter
    space, locations distributed to exercise the full range of values
    that each parameter can take on.

    This is useful for quick and dirty application of tests to a bunch
    of locations in the sample space. Kind-of a fuzz-testing for
    the model.

    Uses latin hypercube sampling, with random values within
    the sample bins. The LHS sampler shuffles the bins each time,
    so a subsequent call will yield a different sample from the
    parameter space.

    When a variable has both upper and lower bounds, use a uniform
    sample between those bounds.

    When a variable has only one bound, use an exponential distribution
    with the scale set to be the difference between the bound and the
    current model value (1 if they are the same)

    When the variable has neither bound, use a normal distribution centered
    on the current model value, with scale equal to the absolute value
    of the model value (1 if that magnitude is 0)

    Parameters
    ----------
    model: pysd.Model object

    param_list: None or list of strings
        The real names of parameters to include in the explored parameter
        space.
        If None, uses all of the constants in the model except TIME STEP,
        INITIAL TIME, etc.

    bounds: DataFrame, string filename, or None
        A range test matrix as used for bounds checking.
        If None, creates one from the model
        These bounds can also place artificial limits on the
        parameter space you want to explore, even if the theoretical
        bounds on the variable are infinite.

    samples: int
        How many samples to include in the iterator?

    Returns
    -------
    lhs : pandas DataFrame
        distribution-weighted latin hypercube samples

    Note
    ----
    Executes the model by 1 time-step to get the current value of parameters.

    """
    if param_list is None:
        doc = model.doc()
        param_list = sorted(
            list(
                set(doc[doc['Type'] == 'constant']['Real Name']) -
                {'FINAL TIME', 'INITIAL TIME', 'TIME STEP', 'TIME STEP'}))

    if isinstance(bounds, _pd.DataFrame):
        bounds = bounds.set_index('Real Name')
    elif bounds is None:
        bounds = create_bounds_test_matrix(model).set_index('Real Name')
    elif isinstance(bounds, str):
        if bounds.split('.')[-1] in ['xls', 'xlsx']:
            bounds = _pd.read_excel(bounds,
                                    sheetname='Bounds',
                                    index_col='Real Name')
        elif bounds.split('.')[-1] == 'csv':
            bounds = _pd.read_csv(bounds, index_col='Real Name')
        elif bounds.split('.')[-1] == 'tab':
            bounds = _pd.read_csv(bounds, sep='\t', index_col='Real Name')
        else:
            raise ValueError('Unknown file type: bounds')
    else:
        raise ValueError('Unknown type: bounds')

    if seed is not None:
        _np.random.seed(seed)

    unit_lhs = _pd.DataFrame(_pyDOE.lhs(n=len(param_list), samples=samples),
                             columns=param_list)  # raw latin hypercube sample

    res = model.run(return_timestamps=[model.components.initial_time()])
    lhs = _pd.DataFrame(index=unit_lhs.index)
    for param in param_list:
        lower, upper = bounds[['Min', 'Max']].loc[param]
        value = res[param].iloc[0]

        if lower == upper:
            lhs[param] = lower

        elif _np.isfinite(lower) and _np.isfinite(
                upper):  # np.isfinite(0)==True
            scale = upper - lower
            lhs[param] = _dist.uniform(lower, scale).ppf(unit_lhs[param])

        elif _np.isfinite(lower) and _np.isinf(upper):
            if lower == value:
                scale = 1
            else:
                scale = value - lower
            lhs[param] = _dist.expon(lower, scale).ppf(unit_lhs[param])

        elif _np.isinf(lower) and _np.isfinite(
                upper):  # np.isinf(-np.inf)==True
            if upper == value:
                scale = 1
            else:
                scale = upper - value
            lhs[param] = upper - _dist.expon(0, scale).ppf(unit_lhs[param])

        elif _np.isinf(lower) and _np.isinf(upper):  # np.isinf(-np.inf)==True
            if value == 0:
                scale = 1
            else:
                scale = abs(value)
            lhs[param] = _dist.norm(value, scale).ppf(unit_lhs[param])

        else:
            raise ValueError('Problem with lower: %s or upper: %s bounds' %
                             (lower, upper))

    return lhs
예제 #21
0
def mult(A, B, epsilon=0.25, delta=0.25, decompose=False):
    '''
  Matrix multiplication using Algorithm 1 (Page 5)
  without the use of the Fast Johnson-Lindenstrauss Transform (FJLT).

  Let C be the output of this function. Then:
     Pr(||AB-C||_F <= 2*epsilon*||A||_F * ||B||_F) >= 1 - delta.

  Time complexity of:
    O(b(1/e^2((n+g)(m+p)) + g(n(m+p)+lg(g)))), where:
    * e = epsilon
    * b = beta = O(lg(1/delta))
    * g = gamma = O(b + lg(b)) = O(lg(1/delta) + lg(lg(1/delta))).
  with
    * epsilon > 0
    * 0 < delta <= 1/2, since we require lglg(1/delta) >= 0 (lg = log base 2).

  decompose - if True, returns two matrices X, Y such that XY = AB, though
              X and Y don't take up as much space as A and B.
  '''
    assert epsilon > 0, f'epsilon must be greater than 0. Found epsilon={epsilon}'
    assert 0 <= delta <= 1 / 2., f'delta must be in (0, 1/2]. Found delta={delta}'
    m, n = A.shape
    nn, p = B.shape
    assert n == nn, f'Dimension mismatch: cannot multiply {(m, n)} by {(nn, p)}.'

    # Step 1, Lines 1-2:
    # Compute lg(1/delta) tug-of-war matrices, S_i, using standard normal.
    N = norm(0, 1)
    beta = np.log2(1 / delta)
    S = epsilon * N.rvs(size=discretize((beta, 1 / epsilon**2, n)))

    # Step 2, Lines 3-4:
    # Compute lg(1/delta) x 2(lg(1/delta) + lg(lg(1/delta))) tug-of-war matrices,
    # Q_{i,j}, using standard normal.
    gamma = 2 * (beta + np.log2(beta))
    # If 1/epsilon^2 = 16, then epsilon = 1/4.
    Q = (1 / 4.) * N.rvs(size=discretize((beta, gamma, 16, p)))

    # Step 3a: Compute transpose of matrices first.
    St = np.transpose(S, axes=(0, 2, 1))
    Qt = np.transpose(Q, axes=(0, 1, 3, 2))

    # Step 3, Line 5:
    # Compute SB and AS^t for all S.
    SB, ASt = S @ B, A @ St

    # Step 4, Line 6:
    # Compute A(BQ^t) and then X = A(BQ^t) for all Q.
    BQt = B @ Qt
    ABQt = A @ BQt
    X = ABQt

    # Step 5, Line 7:
    # Compute (SB)Q^t and then Xhat = (AS^t)(SBQ^t).
    SBQt = np.einsum('hjk,hikl->hijl', SB, Qt)
    Xhat = np.einsum('hjk,hikl->hijl', ASt, SBQt)

    # Step 6, Line 8:
    # Compute y_{i,j} = ||X_{i,j}-Xhat{i,j}||_F^2.
    y = np.linalg.norm(X - Xhat, axis=(2, 3))**2

    # Step 7, Line 9:
    # Compute z_i = median of y_{i,j} over j.
    z = np.median(y, axis=1)

    # Step 8, Line 10:
    # Compute i^* = argmin z_i.
    i = np.argmin(z)

    return (ASt[i], SB[i]) if decompose else ASt[i] @ SB[i]
예제 #22
0
import csv
import pymbar
from pymbar import testsystems, MBAR, timeseries
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import copy
import os
import os.path
import optparse
import scipy
from optparse import OptionParser
from scipy.stats import distributions

normal1 = distributions.norm(3, 1.1)
normal2 = distributions.norm(7, 1)
uniform = distributions.uniform(0, 10)

gamma = distributions.gamma(6, 0.01)
x = np.linspace(0, 10, 100)
y1 = normal1.pdf(x) + normal2.pdf(x)
y2 = uniform.pdf(x)

plt.plot(x, y2)
plt.fill_between(x, 0, y2)
plt.title('Prior Distribution')
plt.ylim([0, 0.5])
plt.show()

plt.plot(x, y1 * y2)
예제 #23
0
from pyDOE import *
from scipy.stats.distributions import norm

design = lhs(4, samples=10)
means = [1, 2, 3, 4]
stdvs = [0.1, 0.5, 1, 0.25]
for i in range(0,4):
    design[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(design[:, i])
예제 #24
0
 def normals_pdf(mu, sigma, p, obs):
     return sum([p_ * norm(mu_, sigma_).pdf(obs) for mu_, sigma_, p_ in zip(mu, sigma, p)])
예제 #25
0
파일: script.py 프로젝트: simvisage/spirrid
def main():
    # set the mean and standard deviation of la and xi
    m_la, std_la = 10.0, 1.0
    m_xi, std_xi = 1.0, 0.1
    # construct objects representing normal distributions
    pdistrib_la = norm(loc=m_la, scale=std_la)
    pdistrib_xi = norm(loc=m_xi, scale=std_xi)
    # get operators for probability density functions
    g_la = pdistrib_la.pdf
    g_xi = pdistrib_xi.pdf
    # number of integration points set equal for both variables
    n_i = 10
    # generate midpoints of n_i intervals in the range (-1,1)
    theta_arr = np.linspace(-(1.0 - 1.0 / n_i),
                              1.0 - 1.0 / n_i , n_i)
    # scale up theta_arr to cover the random domains
    theta_la = m_la + 4 * std_la * theta_arr
    theta_xi = m_xi + 4 * std_xi * theta_arr
    # get the size of the integration cells
    d_la = (8 * std_la) / n_i
    d_xi = (8 * std_xi) / n_i

    def Heaviside(x):
        """Heaviside function."""
        return x >= 0.0

    def q_eq13(eps, la, xi):
        """Response function of a single fiber."""
        return la * eps * Heaviside(xi - eps)

    def mu_q_eq13_loops(eps_arr):
        """Loop-based calculation of mean values."""
        mu_q_arr = np.zeros_like(eps_arr)
        for i, eps in enumerate(eps_arr):
            mu_q = 0.0
            for la in theta_la:
                for xi in theta_xi:
                    dG = g_la(la) * g_xi(xi) * d_la * d_xi
                    mu_q += q_eq13(eps, la, xi) * dG
            mu_q_arr[i] = mu_q
        return mu_q_arr

    # construct an array of control strains
    eps_arr = np.linspace(0, 1.2, 80)

    # construct an array of control strains
    eps_arr = np.linspace(0, 1.2, 80)

    start_time = sysclock()
    mu_q_arr = mu_q_eq13_loops(eps_arr)
    print 'loop-based: elapsed time', sysclock() - start_time


    dG_la = g_la(theta_la) * d_la
    dG_xi = g_xi(theta_xi) * d_xi
    dG_grid = dG_la[:, np.newaxis] * dG_xi[np.newaxis, :]

    def mu_q_eq13(eps):
        """Loopless calculation of mean value."""
        q_grid = q_eq13(eps,
                         theta_la[:, np.newaxis],
                         theta_xi[np.newaxis, :])
        q_dG_grid = q_grid * dG_grid
        return np.sum(q_dG_grid)

    mu_q_eq13_vct = np.vectorize(mu_q_eq13)
    # eps_arr from line reused here
    start_time = sysclock()
    mu_q_arr = mu_q_eq13_vct(eps_arr)
    print 'Regular grid of random variables: elapsed time', sysclock() - start_time

    p.subplot(121)
    p.plot(eps_arr, mu_q_arr, color='blue', label='Tgrid')
    p.subplot(122)
    expander = np.ones((n_i, n_i), dtype=int)
    p.plot((theta_la[np.newaxis, :] * expander).flatten(),
            (theta_xi[:, np.newaxis] * expander).flatten(),
            'b.', label='Tgrid')


    def get_mu_q_fn(q, dG, *theta):
        """Return a method evaluating the mean of q()."""
        def mu_q(eps):
            Q_dG = q(eps, *theta) * dG
            return np.sum(Q_dG)
        return np.vectorize(mu_q)

    # SAMPLING: (*\label{line:TGrid_example_start}*)
    # ... reuse dG_grid and theta (lines (*\ref{line:theta_la}*), (*\ref{line:theta_xi}*) and (*\ref{line:g_la}*)-(*\ref{line:dG_grid}*))

    # INSTANTIATION:
    mu_q_fn = get_mu_q_fn(q_eq13, dG_grid,
                          theta_la[:, np.newaxis],
                          theta_xi[np.newaxis, :])

    # CALCULATION:
    mu_q_arr = mu_q_fn(eps_arr)

    # SAMPLING:
    # equidistant sampling probabilities (see Eq. (*\ref{eq:p_grid_sampling}*))
    j_arr = np.arange(1, n_i + 1)
    pi_arr = (j_arr - 0.5) / n_i
    # use ppf (percent point function) to get sampling points
    # (pdistrib_la and pdistrib_xi was defined at lines (*\ref{line:pdistrib_la}*), (*\ref{line:pdistrib_xi}*))
    theta_la = pdistrib_la.ppf(pi_arr)
    theta_xi = pdistrib_xi.ppf(pi_arr)
    # get the total number of integration points
    # for 2 random variaables with equal n_i
    n_sim = n_i ** 2

    # INSTANTIATION:
    mu_q_fn = get_mu_q_fn(q_eq13, 1.0 / n_sim,
                          theta_la[:, np.newaxis],
                          theta_xi[np.newaxis, :])

    start_time = sysclock()
    # CALCULATION:
    mu_q_arr = mu_q_fn(eps_arr)
    print 'Grid of constant probabilities: elapsed time', sysclock() - start_time

    p.subplot(121)
    p.plot(eps_arr, mu_q_arr, color='cyan', label='Pgrid')
    p.subplot(122)
    p.plot((theta_la[np.newaxis, :] * expander).flatten(),
            (theta_xi[:, np.newaxis] * expander).flatten(),
            'co', label='Pgrid')


    # SAMPLING:
    # generate n_sim random realizations
    # using pdistrib objects (lines (*\ref{line:pdistrib_la}*), (*\ref{line:pdistrib_xi}*))
    theta_la_rvs = pdistrib_la.rvs(n_sim)
    theta_xi_rvs = pdistrib_xi.rvs(n_sim)

    # INSTANTIATION:
    mu_q_fn = get_mu_q_fn(q_eq13, 1.0 / n_sim,
                          theta_la_rvs, theta_xi_rvs)
    start_time = sysclock()
    # CALCULATION:
    mu_q_arr = mu_q_fn(eps_arr)
    print 'Monte-Carlo: elapsed time', sysclock() - start_time

    p.subplot(121)
    p.plot(eps_arr, mu_q_arr, color='red', label='Monte-Carlo')
    p.subplot(122)
    p.plot(theta_la_rvs, theta_xi_rvs, 'rD', label='Monte-Carlo')


    # SAMPLING: (*\label{line:LHS_example_start}*)
    # sampling probabilities (see Eq. (*\ref{eq:LHS_sampling}*)), n_sim as above
    j_arr = np.arange(1, n_sim + 1)
    pi_arr = (j_arr - 0.5) / n_sim
    # get the ppf values (percent point function)
    # using pdistrib objects defined at lines (*\ref{line:pdistrib_la}*), (*\ref{line:pdistrib_xi}*)
    theta_la_ppf = pdistrib_la.ppf(pi_arr)
    theta_xi_ppf = pdistrib_xi.ppf(pi_arr)
    # make random permutations of both arrays to diminish
    # correlation (not necessary for one of the random variables)
    theta_la = np.random.permutation(theta_la_ppf)
    theta_xi = theta_xi_ppf

    # INSTANTIATION:
    mu_q_fn = get_mu_q_fn(q_eq13, 1.0 / n_sim,
                          theta_la, theta_xi)
    start_time = sysclock()
    # CALCULATION:
    mu_q_arr = mu_q_fn(eps_arr)
    print 'Grid of constant probabilities: elapsed time', sysclock() - start_time

    p.subplot(121)
    p.plot(eps_arr, mu_q_arr, color='green', label='LHS')
    p.subplot(122)
    p.plot(theta_la, theta_xi, 'go', label='LHS')

    p.subplot(121)
    p.legend()
    p.xlabel('$\\varepsilon$', fontsize=24)
    p.ylabel('$q$', fontsize=24)


    ############################## Discretization grids ########################
    p.subplot(122)
    p.ylabel('$\\theta_{\\xi}$', fontsize=24)
    p.ylim(0.5, 1.5)
    p.xlim(5, 15)
    p.xlabel('$\\theta_{\lambda}$', fontsize=24)
    p.legend()

    p.show()
예제 #26
0
파일: glm.py 프로젝트: yohanJung/revrand
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_is_fitted, check_X_y, check_array
from sklearn.utils import check_random_state

from .utils import atleast_list, issequence
from .mathfun.special import logsumexp
from .basis_functions import LinearBasis, apply_grad
from .likelihoods import Gaussian
from .optimize import sgd, structured_sgd, logtrick_sgd, Adam
from .btypes import Bound, Positive, Parameter

# Set up logging
log = logging.getLogger(__name__)

# Module settings
WGTRND = norm()  # Sampling distribution over mixture weights
COVRND = gamma(a=2, scale=0.5)  # Sampling distribution over mixture covariance
LOGITER = 500  # Number of SGD iterations between logging ELBO and hypers


class GeneralizedLinearModel(BaseEstimator, RegressorMixin):
    r"""
    Bayesian Generalized linear model (GLM).

    This provides a scikit learn compatible interface for the glm module.

    Parameters
    ----------
    likelihood : Object
        A likelihood object, see the likelihoods module.
    basis : Basis
def normatval(mu, sigma, x):
    normdist = dist.norm()
    return normdist.pdf((x-mu)/sigma)
예제 #28
0
        s = self.discrete_rv
        x = self.continuous_rv

        for k in range(len(s.xk)):
            F = F + x.cdf(z - s.xk[k]) * s.pk[k]
        return F

    def _pdf(self, z):
        f = 0
        s = self.discrete_rv
        x = self.continuous_rv

        for k in range(len(s.xk)):
            f = f + x.pdf(z - s.xk[k]) * s.pk[k]
        return f


if __name__ == "__main__":  # If running this code instead of importing...
    x = iid.norm()  # Create continuous rv

    Omega = (-1, 0, 1)  # Sample space for discrete rvs

    # Create two discrete rvs
    r = iid.rv_discrete(values=(Omega, (1 / 3., 1 / 2., 1 / 6.)))
    s = iid.rv_discrete(values=(Omega, (5 / 6., 1 / 12., 1 / 12.)))

    # Create new convolved rv:
    y = ConvolvedContinuousAndDiscrete(x, s)

    t = ConvolvedDiscrete(r, s)
예제 #29
0
    def __data_inverse(self, data_row, num_samples, sampling_method):
        """Generates a neighborhood around a prediction.

        For numerical features, perturb them by sampling from a Normal(0,1) and
        doing the inverse operation of mean-centering and scaling, according to
        the means and stds in the training data. For categorical features,
        perturb by sampling according to the training distribution, and making
        a binary feature that is 1 when the value is the same as the instance
        being explained.

        Args:
            data_row: 1d numpy array, corresponding to a row
            num_samples: size of the neighborhood to learn the linear model
            sampling_method: 'gaussian' or 'lhs'

        Returns:
            A tuple (data, inverse), where:
                data: dense num_samples * K matrix, where categorical features
                are encoded with either 0 (not equal to the corresponding value
                in data_row) or 1. The first row is the original instance.
                inverse: same as data, except the categorical features are not
                binary, but categorical (as the original data)
        """
        is_sparse = sp.sparse.issparse(data_row)
        if is_sparse:
            num_cols = data_row.shape[1]
            data = sp.sparse.csr_matrix((num_samples, num_cols),
                                        dtype=data_row.dtype)
        else:
            num_cols = data_row.shape[0]
            data = np.zeros((num_samples, num_cols))
        categorical_features = range(num_cols)
        if self.discretizer is None:
            instance_sample = data_row
            scale = self.scaler.scale_
            mean = self.scaler.mean_
            if is_sparse:
                # Perturb only the non-zero values
                non_zero_indexes = data_row.nonzero()[1]
                num_cols = len(non_zero_indexes)
                instance_sample = data_row[:, non_zero_indexes]
                scale = scale[non_zero_indexes]
                mean = mean[non_zero_indexes]

            if sampling_method == 'gaussian':
                data = self.random_state.normal(
                    0, 1,
                    num_samples * num_cols).reshape(num_samples, num_cols)
                data = np.array(data)
            elif sampling_method == 'lhs':
                data = lhs(num_cols,
                           samples=num_samples).reshape(num_samples, num_cols)
                means = np.zeros(num_cols)
                stdvs = np.array([1] * num_cols)
                for i in range(num_cols):
                    data[:, i] = norm(loc=means[i],
                                      scale=stdvs[i]).ppf(data[:, i])
                data = np.array(data)
            else:
                warnings.warn(
                    '''Invalid input for sampling_method.
                                 Defaulting to Gaussian sampling.''',
                    UserWarning)
                data = self.random_state.normal(
                    0, 1,
                    num_samples * num_cols).reshape(num_samples, num_cols)
                data = np.array(data)

            if self.sample_around_instance:
                data = data * scale + instance_sample
            else:
                data = data * scale + mean
            if is_sparse:
                if num_cols == 0:
                    data = sp.sparse.csr_matrix(
                        (num_samples, data_row.shape[1]), dtype=data_row.dtype)
                else:
                    indexes = np.tile(non_zero_indexes, num_samples)
                    indptr = np.array(
                        range(0,
                              len(non_zero_indexes) * (num_samples + 1),
                              len(non_zero_indexes)))
                    data_1d_shape = data.shape[0] * data.shape[1]
                    data_1d = data.reshape(data_1d_shape)
                    data = sp.sparse.csr_matrix(
                        (data_1d, indexes, indptr),
                        shape=(num_samples, data_row.shape[1]))
            categorical_features = self.categorical_features
            first_row = data_row
        else:
            first_row = self.discretizer.discretize(data_row)
        data[0] = data_row.copy()
        inverse = data.copy()
        for column in categorical_features:
            values = self.feature_values[column]
            freqs = self.feature_frequencies[column]
            inverse_column = self.random_state.choice(values,
                                                      size=num_samples,
                                                      replace=True,
                                                      p=freqs)
            binary_column = (inverse_column == first_row[column]).astype(int)
            binary_column[0] = 1
            inverse_column[0] = data[0, column]
            data[:, column] = binary_column
            inverse[:, column] = inverse_column
        if self.discretizer is not None:
            inverse[1:] = self.discretizer.undiscretize(inverse[1:])
        inverse[0] = data_row
        return data, inverse
예제 #30
0
# Hang on to the original values for comparison
nominal_values = np.array([p.value for p in p_to_fit])
x_test = np.log10(nominal_values)

print "True values (in log10 space):", x_test
print "Nominal error:", obj_func(x_test)

# Pick a starting point; in practice this would be random selected by
# a sampling strategy (e.g., latin hypercube sampling) or from a prior
# distribution
num_rand = 6
design = lhs(len(p_to_fit), samples=num_rand / len(p_to_fit))
means = x_test
stdvs = np.array([0.1, 0.1, 0.1])
for alp in range(len(p_to_fit)):
    design[:, alp] = norm(loc=means[alp], scale=stdvs[alp]).ppf(design[:, alp])

#Create a list of methods

meth_list = [
    'Nelder-Mead', 'Powell', 'COBYLA', 'TNC', 'L-BFGS-B', 'CG', 'BFGS',
    'SLSQP', 'trust-ncg', 'Newton-CG'
]
met_list = ['dogleg']

#Create arrays for storing no. of function evaluations and objective function values

func_eval = np.zeros((len(meth_list), num_rand / len(p_to_fit)))
obj_val = np.zeros((len(meth_list), num_rand / len(p_to_fit)))

#Run the minimization algorithm for each initial value
예제 #31
0
def AR1_logpdf(value, k, tau_e):
    return (sp.norm(loc=0, scale=1 / np.sqrt(tau_e)).logpdf(value[0]) +
            sp.norm(loc=k * value[:-1], scale=1 / np.sqrt(tau_e)).logpdf(
                value[1:]).sum())
예제 #32
0
파일: kde.py 프로젝트: xBau/post-its
                    fresh[this_index] = this
                    if that_index in fresh:
                        del fresh[that_index]
                else:
                    fresh[that_index] = that
                    if this_index in fresh:
                        del fresh[this_index]
                found = True
                break
        if not found:
            fresh[this_index] = this

    result = []
    for i in fresh:
        result.append(x[i])
        # result.append(list(i.exterior.coords)[:-1])

    return result


if __name__ == '__main__':
    x = np.concatenate([norm(1000, 1.).rvs(400), norm(20000, 1.).rvs(100)])
    print x
    print mode(x)

    af = AffinityPropagation(preference=-50)
    af.fit(x[:, np.newaxis])
    print dir(af)
    print af.cluster_centers_indices_
    print len(af.cluster_centers_indices_)
def gaussian(mu=0, sd=1):
    return D.norm(mu, sd)
def gaussian(mu=0,sd=1):
    return D.norm(mu,sd)
예제 #35
0
def design_lhs_exp(variables, maps, offsets=None, samples=int(1e4), project_linear=True):
    """ Design an LHS experiment """

    design = lhs(len(variables), samples=samples, criterion="m", iterations=100)
    z_design = np.zeros_like(design)

    print "Computing LHS design..."
    if project_linear:
        print "   using linear re-projection for log variables"
    else:
        print "   using original variable coordinate"
    for i, v in enumerate(variables):
        dist, a, b = v[3]

        if project_linear:  # Re-sample in linear space
            if v[0].startswith("ln"):
                ## 9/4/2014
                ## This is an experimental correction to re-project the
                ## logarithmic variables into their normal coordinate
                ## system. It should only effect the sampling, and hopefully
                ## improve it by forcing it to even things out over the
                ## actually range we care about
                a = np.exp(a)
                b = np.exp(b)
                offsets[i] = np.exp(offsets[i])

            elif v[0].startswith("log"):
                ## 10/26/2014
                ## In accordance with above, but for log10 vars
                a = 10.0 ** a
                b = 10.0 ** b
                offsets[i] = 10.0 ** offsets[i]

        if offsets:
            ## These corrections with "offsets" re-center the interval
            ## so that the left endpoint is 0. I found that if arbitrary
            ## lower/upper limits were used, sometimes the PPF routines
            ## would really mess up in inverting the CDF.
            a, b = a - offsets[i], b - offsets[i]
        if dist == "uniform":
            design[:, i] = uniform(a, b).ppf(design[:, i])
        elif dist == "normal":
            design[:, i] = norm(a, b).ppf(design[:, i])
        elif dist == "loguniform":
            design[:, i] = loguni_ppf(design[:, i], a, b)
        else:
            raise ValueError("no dist defined for %s" % dist)

        if offsets:
            ## Project back in to the correct limits
            design[:, i] += offsets[i]
            a, b = a + offsets[i], b + offsets[i]

        if project_linear:
            if v[0].startswith("ln"):
                ## 9/4/2014
                ## Second half of correction
                a = np.log(a)
                b = np.log(b)
                design[:, i] = np.log(design[:, i])
            elif v[0].startswith("log"):
                ## 10/26/2014
                a = np.log10(a)
                b = np.log10(b)
                design[:, i] = np.log10(design[:, i])

        z_design[:, i] = maps[i](design[:, i], a, b)
    design = design.T  # in x-coords
    z_design = z_design.T

    return design, z_design
예제 #36
0
def nestedSimulationEX1(Ndata, K1=100, tn=50, n_repeat=1, sampling='mc'):
	''' Generation of nested simulation samples at the time horizon.
            Single asset example in Broadie et al (2014)

    by L. Wang

    Tested.

	Parameters
	==========
	Ndata : integer
		no. of training samples
	K1 : integer
		no. of inner loops
	tn : integer
		no. of time steps in between t = \tau and T
	n_repeat : integer
		no. of iterations for computing expectations
	sigma : float
		volatility factor in diffusion term

	Returns
	=======
	Stau : Ndata x 1 nparray
		realizations of underlying price at t = \tau
        Loss : Ndata x n_repeat nparray
                portfolio loss of corresponding Stau
        t_prep : float
                time spent in this procedure
	'''
	import time
	#from doe_lhs import lhs
	from scipy.stats.distributions import norm

    	# --- Parameters ---
	
	S0 = 100.; mu = 0.08; sigma = 0.2
	rfr = 0.03; T = 1./12.; tau = 1./52.
	K = np.array([101., 110., 114.5])
	H = np.array([91., 100., 104.5])
	pos = np.array([1., 1., -1.])
	n = len(K)    # number of options
	
	t0 = time.time()    # timer starts

	# --- portfolio price @ t = 0 ---
	V0 = np.zeros(n)
	for i in range(n):
	    V0[i] = rear_end_dnOutPut(S0, K[i], rfr, T, sigma, H[i], tau, q=0.)
	Value0 = np.sum(pos * V0)
	
	# --- portfolio loss distribution @ t = \tau ---
	
	# draw samples and generates real-world scenarios
	if sampling == 'mc':
	    sn = npr.standard_normal((Ndata, 1))    # be careful of the vector size
	elif sampling == 'lhs':
		sn = lhs(1,samples=Ndata); sn = norm(loc=0,scale=1).ppf(sn)

	
	Stau = np.zeros((Ndata, 1))
	Stau[:] = S0
	Stau = Stau * np.exp((mu - 0.5 * sigma ** 2) * tau + sigma * np.sqrt(tau) * sn)

	if n_repeat == 1:
		Vtau = np.zeros((Ndata, n))
		ValueTau = np.zeros(Ndata)   # be careful of the vector size
		for i in range(Ndata):
			for j in range(n):
				Vtau[i][j] = dnOutPut_nmcs(Stau[i], K[j], rfr, T-tau, sigma, H[j], M=tn, I=K1)
			ValueTau[i] = np.sum(pos * Vtau[i])
	else:
	    ValueTau = np.zeros((Ndata,n_repeat))   # be careful of the vector size
	    for i in range(n_repeat):
	    	Vtau = np.zeros((Ndata, n))
	    	for j in range(Ndata):
	    		for k in range(n):
	    			Vtau[j][k] = dnOutPut_nmcs(Stau[j], K[k], rfr, T-tau, sigma, H[k], M=tn, I=K1)
	    		ValueTau[j][i] = np.sum(pos * Vtau[j])
	Loss = Value0 - ValueTau
    
    
    
	t_prep = time.time() - t0    # timer off

	return Stau, Loss, t_prep
예제 #37
0
# set the mean and the range of the sampling
c_mean, c_stdev = 1.0, 0.2
x_mean, x_stdev = 10.0, 2.0

# generate arrays of sampling values
c_arr = linspace( c_mean - ( 4 * c_stdev ), c_mean + ( 4 * c_stdev ), n_int )
x_arr = linspace( x_mean - ( 4 * x_stdev ), x_mean + ( 4 * x_stdev ), n_int )

# grid distances
dc = ( c_arr[-1] - c_arr[0] ) / n_int
dx = ( x_arr[-1] - x_arr[0] ) / n_int

# construct the normal distribution and get the method
# for the evaluation of the cumulative probability
pdf_c = norm( loc = c_mean, scale = c_stdev ).pdf
pdf_x = norm( loc = x_mean, scale = x_stdev ).pdf

def Heaviside( x ):
    ''' Heaviside function '''
    #@TODO: same as definition
    return ( sign( x ) + 1.0 ) / 2.

def q( e, c, x ):
    ''' Response function of a single fiber '''
    return c * e * Heaviside( x - e )

# prepare the sequence of the control strains
# evaluate the response for an array of values of the control variable
e_arr = linspace( 0, 20, 100 )
# define an array of the same size as e_arr
예제 #38
0
파일: sampling.py 프로젝트: jellelang/IM
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 16 11:04:04 2015

@author: jelle
"""



import sys,os
path='C:/PostDoc/Python/IM/BASICS/pyDOE-0.3.7'
sys.path.append(path)
from pyDOE import *
from scipy.stats.distributions import norm

#basis: dit is voor eentje
lhd = lhs(2, samples=5)
lhd = norm(loc=0, scale=1).ppf(lhd)  # this applies to both factors here



#vier verdelingen en maak een reeks van 20 samples
design = lhs(4, samples=20)
from scipy.stats.distributions import norm
means = [1, 2, 3, 4]
stdvs = [0.1, 0.5, 1, 0.25]
for i in xrange(4):
    design[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(design[:, i])

예제 #39
0
from scipy.stats.distributions import norm, uniform # import normal distribution
import pylab as p  # import matplotlib with matlab interface
import numpy as np # import numpy package
from time import clock

if __name__ == '__main__':
    n_rv = 2 # number of random variables
    n_int = 5 # number of discretization points

    # set the mean and standard deviation of the two random variables
    m_la, std_la = 10.0, 1.0
    m_xi, std_xi = 1.0, 0.1

    # construct the normal distributions and get the methods
    # for the evaluation of the probability density functions
    g_la = norm(loc = m_la, scale = std_la)
    g_xi = norm(loc = m_xi, scale = std_xi)

    # discretize the range (-1,1) symmetrically with n_int points
    theta_arr = np.linspace(-(1.0 - 1.0 / n_int), 1.0 - 1.0 / n_int , n_int)
    # cover the random variable symmetrically around the mean 
    theta_la = m_la + 4 * std_la * theta_arr
    theta_xi = m_xi + 4 * std_xi * theta_arr
    # get hte size of the integration cell
    d_la = (8 * std_la) / n_int
    d_xi = (8 * std_xi) / n_int

    def Heaviside(x):
        ''' Heaviside function '''
        return (np.sign(x) + 1.0) / 2.0
예제 #40
0
파일: lhs_run.py 프로젝트: PySFE/pySFE
rnd_array_6 = np.array(rnd_array_6)

#   Calculate gumbel parameters
qfd_scale = (qfd_std * (6**0.5)) / np.pi
qfd_loc = qfd_mean - (0.5722 * qfd_scale)

#   Near field standard deviation
std_nft = (1.939 - (np.log(avg_nft) * 0.266)) * avg_nft

#   Convert LHS probabilities to distribution invariants
comb_lhs = linear_dist(com_eff_min, com_eff_max, rnd_array_4)
qfd_lhs = gumbel_r(loc=qfd_loc, scale=qfd_scale).ppf(rnd_array_1) * comb_lhs
glaz_lhs = linear_dist(glaz_min, glaz_max, rnd_array_2)
beam_lhs = linear_dist(beam_min, beam_max, rnd_array_3) * depth
spread_lhs = linear_dist(spread_min, spread_max, rnd_array_5)
nft_lhs = norm(loc=avg_nft, scale=std_nft).ppf(rnd_array_6)

#   initialise output arrays
peak_st_fract = []
peak_st_temp = []

for i in range(0, lhs_iterations):
    dict_inputs = {
        "window_height": win_height,
        "window_width": win_width,
        "window_open_fraction": glaz_lhs[i],
        "room_breadth": breadth,
        "room_depth": depth,
        "room_height": height,
        "fire_load_density": qfd_lhs[i],
        "fire_hrr_density": hrr_pua,
def normatval(mu, sigma, x):
    normdist = dist.norm()
    return normdist.pdf((x - mu) / sigma)
예제 #42
0
 def distribution(self, t: float) -> rv_frozen:
     return norm(self.mu * t, self.sigma * np.sqrt(t))
예제 #43
0
from numpy import *
import pymc
from scipy import stats
from scipy.stats import distributions as d

#parameters about the da
dimensions = 100
observations = 100
shape = (dimensions, observations)

data = d.norm(  loc = 0, scale = 1).rvs((dimensions, observations))
def model_gen():
    
    variables = []
    
    means = pymc.Normal("means",mu = zeros(dimensions), tau = ones(dimensions))

    sds = pymc.Gamma("sds", alpha = ones(dimensions) * 1 , beta = ones(dimensions) * 1)

    
    variables.append(means)
    variables.append (sds)
    
    @pymc.deterministic
    def precisions ( stdev = sds):
        precisions = (ones(shape) * (stdev**-2)[:, newaxis]).ravel()
    
        return precisions
    
    @pymc.deterministic
    def obsMeans (means = means):
예제 #44
0
import platform
import time

if platform.system() == 'Linux':
    sysclock = time.time
elif platform.system() == 'Windows':
    sysclock = time.clock

if __name__ == '__main__':
    n_rv = 2
    # set the mean and standard deviation of the two random variables
    m_la, std_la = 10.0, 1.0
    m_xi, std_xi = 1.0, 0.1

    # construct objects representing normal distribution
    pdistrib_la = norm(loc = m_la, scale = std_la)
    pdistrib_xi = norm(loc = m_xi, scale = std_xi)

    # for the evaluation of the probability density functions
    g_la = pdistrib_la.pdf
    g_xi = pdistrib_xi.pdf

    n_int = 10 # number of discretization points

    # discretize the range (-1,1) symmetrically with n_int points
    theta_arr = np.linspace(-(1.0 - 1.0 / n_int), 1.0 - 1.0 / n_int , n_int)

    # cover the random variable symmetrically around the mean 
    theta_la = m_la + 4 * std_la * theta_arr
    theta_xi = m_xi + 4 * std_xi * theta_arr
예제 #45
0
from pyDOE import *
from scipy.stats.distributions import norm

# Latin Hypercube Sampling
# see: https://pythonhosted.org/pyDOE/randomized.html

# Run LHS for n factors
X = lhs(4, samples=100)  # lhs(n, [samples, criterion, iterations])

# Transform factors to normal distributions with means and standard deviations
means = [1, 2, 3, 4]
stdvs = [0.1, 0.5, 1, 0.25]
for i in range(4):
    X[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(X[:, i])
예제 #46
0
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity
import numpy as np
from scipy.stats.distributions import norm
x = np.concatenate([norm(-1, 1.).rvs(400),norm(1, 0.3).rvs(100)])
y = np.concatenate([norm(0, 1.).rvs(400),norm(1, 0.3).rvs(100)])
grid = GridSearchCV(KernelDensity(),
                    {'bandwidth': np.linspace(-4.5, 3.5, 100)},cv=20)#np.linspace  Return evenly spaced numbers over a specified interval. # 20-fold cross-validation
grid.fit(x[:, None])
print grid.best_params_
예제 #47
0
import pymc3 as pm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import distributions as dist
import theano
import arviz as az
import warnings

warnings.filterwarnings('ignore')

x = np.array([1, 5, 8])
y = 1 + x + dist.norm(0, 1.5).rvs(3)
y = y.reshape(-1, 1)
x = x.reshape(-1, 1)
xs = x
ys = y
print(x.shape)

initial_w = dist.uniform(0, 1).rvs(1)

with pm.Model() as NN:
    x = pm.Data('x', x)
    y = pm.Data('y', y)
    w = pm.Normal('w', mu=0, sigma=10, shape=[1, 1], testval=initial_w)
    l1 = pm.Deterministic('l1', pm.math.dot(x, w))
    y_l = pm.Normal('y_l', l1, observed=y)
    trace = pm.sample(cores=1)
    pp = pm.sample_posterior_predictive(trace, random_seed=123)

plt.plot(xs, pp['y_l'].mean(axis=0).reshape(1, -1).flatten())
예제 #48
0
def sample_pspace(model, param_list=None, bounds=None, samples=100, seed=None):
    """
    A DataFrame where each row represents a location in the parameter
    space, locations distributed to exercise the full range of values
    that each parameter can take on.

    This is useful for quick and dirty application of tests to a bunch
    of locations in the sample space. Kind-of a fuzz-testing for
    the model.

    Uses latin hypercube sampling, with random values within
    the sample bins. The LHS sampler shuffles the bins each time,
    so a subsequent call will yield a different sample from the
    parameter space.

    When a variable has both upper and lower bounds, use a uniform
    sample between those bounds.

    When a variable has only one bound, use an exponential distribution
    with the scale set to be the difference between the bound and the
    current model value (1 if they are the same)

    When the variable has neither bound, use a normal distribution centered
    on the current model value, with scale equal to the absolute value
    of the model value (1 if that magnitude is 0)

    Parameters
    ----------
    model: pysd.Model object

    param_list: None or list of strings
        The real names of parameters to include in the explored parameter
        space.
        If None, uses all of the constants in the model except TIME STEP,
        INITIAL TIME, etc.

    bounds: DataFrame, string filename, or None
        A range test matrix as used for bounds checking.
        If None, creates one from the model
        These bounds can also place artificial limits on the
        parameter space you want to explore, even if the theoretical
        bounds on the variable are infinite.

    samples: int
        How many samples to include in the iterator?

    Returns
    -------
    lhs : pandas DataFrame
        distribution-weighted latin hypercube samples

    Note
    ----
    Executes the model by 1 time-step to get the current value of parameters.

    """
    if param_list is None:
        doc = model.doc()
        param_list = sorted(list(set(doc[doc['Type'] == 'constant']['Real Name']) -
                            {'FINAL TIME', 'INITIAL TIME', 'TIME STEP', 'TIME STEP'}))

    if isinstance(bounds, _pd.DataFrame):
        bounds = bounds.set_index('Real Name')
    elif bounds is None:
        bounds = create_bounds_test_matrix(model).set_index('Real Name')
    elif isinstance(bounds, str):
        if bounds.split('.')[-1] in ['xls', 'xlsx']:
            bounds = _pd.read_excel(bounds, sheetname='Bounds', index_col='Real Name')
        elif bounds.split('.')[-1] == 'csv':
            bounds = _pd.read_csv(bounds, index_col='Real Name', encoding='UTF-8')
        elif bounds.split('.')[-1] == 'tab':
            bounds = _pd.read_csv(bounds, sep='\t', index_col='Real Name', encoding='UTF-8')
        else:
            raise ValueError('Unknown file type: bounds')
    else:
        raise ValueError('Unknown type: bounds')

    if seed is not None:
        _np.random.seed(seed)

    unit_lhs = _pd.DataFrame(_pyDOE.lhs(n=len(param_list), samples=samples),
                             columns=param_list)  # raw latin hypercube sample

    res = model.run(return_timestamps=[model.components.initial_time()])
    lhs = _pd.DataFrame(index=unit_lhs.index)
    for param in param_list:
        lower, upper = bounds[['Min', 'Max']].loc[param]
        value = res[param].iloc[0]

        if lower == upper:
            lhs[param] = lower

        elif _np.isfinite(lower) and _np.isfinite(upper):  # np.isfinite(0)==True
            scale = upper - lower
            lhs[param] = _dist.uniform(lower, scale).ppf(unit_lhs[param])

        elif _np.isfinite(lower) and _np.isinf(upper):
            if lower == value:
                scale = 1
            else:
                scale = value - lower
            lhs[param] = _dist.expon(lower, scale).ppf(unit_lhs[param])

        elif _np.isinf(lower) and _np.isfinite(upper):  # np.isinf(-np.inf)==True
            if upper == value:
                scale = 1
            else:
                scale = upper - value
            lhs[param] = upper - _dist.expon(0, scale).ppf(unit_lhs[param])

        elif _np.isinf(lower) and _np.isinf(upper):  # np.isinf(-np.inf)==True
            if value == 0:
                scale = 1
            else:
                scale = abs(value)
            lhs[param] = _dist.norm(value, scale).ppf(unit_lhs[param])

        else:
            raise ValueError('Problem with lower: %s or upper: %s bounds' % (lower, upper))

    return lhs
예제 #49
0
def get_ls_factor(n_uncertain, n_samples, pc_order, lamb=0.0):
    # Uncertain parameter design
    sobol_design = sobol_seq.i4_sobol_generate(n_uncertain, n_samples,
                                               ceil(np.log2(n_samples)))
    sobol_samples = np.transpose(sobol_design)
    for i in range(n_uncertain):
        sobol_samples[i, :] = norm(loc=0., scale=1).ppf(sobol_samples[i, :])

    # Polynomial function definition
    x = SX.sym('x')
    he0fcn = Function('He0fcn', [x], [1.])
    he1fcn = Function('He1fcn', [x], [x])
    he2fcn = Function('He2fcn', [x], [x**2 - 1])
    he3fcn = Function('He3fcn', [x], [x**3 - 3 * x])
    he4fcn = Function('He4fcn', [x], [x**4 - 6 * x**2 + 3])
    he5fcn = Function('He5fcn', [x], [x**5 - 10 * x**3 + 15 * x])
    he6fcn = Function('He6fcn', [x], [x**6 - 15 * x**4 + 45 * x**2 - 15])
    he7fcn = Function('He7fcn', [x], [x**7 - 21 * x**5 + 105 * x**3 - 105 * x])
    he8fcn = Function('He8fcn', [x],
                      [x**8 - 28 * x**6 + 210 * x**4 - 420 * x**2 + 105])
    he9fcn = Function('He9fcn', [x],
                      [x**9 - 36 * x**7 + 378 * x**5 - 1260 * x**3 + 945 * x])
    he10fcn = Function(
        'He10fcn', [x],
        [x**10 - 45 * x**8 + 640 * x**6 - 3150 * x**4 + 4725 * x**2 - 945])
    helist = [
        he0fcn, he1fcn, he2fcn, he3fcn, he4fcn, he5fcn, he6fcn, he7fcn, he8fcn,
        he9fcn, he10fcn
    ]

    # Calculation of factor for least-squares
    xu = SX.sym("xu", n_uncertain)
    exps = (p for p in product(range(pc_order + 1), repeat=n_uncertain)
            if sum(p) <= pc_order)
    next(exps)
    exps = list(exps)

    psi = SX.ones(
        int(
            factorial(n_uncertain + pc_order) /
            (factorial(n_uncertain) * factorial(pc_order))))
    for i in range(len(exps)):
        for j in range(n_uncertain):
            psi[i + 1] *= helist[exps[i][j]](xu[j])
    psi_fcn = Function('PSIfcn', [xu], [psi])

    nparameter = SX.size(psi)[0]
    psi_matrix = SX.zeros(n_samples, nparameter)
    for i in range(n_samples):
        psi_a = psi_fcn(sobol_samples[:, i])
        for j in range(SX.size(psi)[0]):
            psi_matrix[i, j] = psi_a[j]

    psi_t_psi = mtimes(psi_matrix.T, psi_matrix) + lamb * DM.eye(nparameter)
    chol_psi_t_psi = chol(psi_t_psi)
    inv_chol_psi_t_psi = solve(chol_psi_t_psi, SX.eye(nparameter))
    inv_psi_t_psi = mtimes(inv_chol_psi_t_psi, inv_chol_psi_t_psi.T)

    ls_factor = mtimes(inv_psi_t_psi, psi_matrix.T)
    ls_factor = DM(ls_factor)

    # Calculation of expectations for variance function
    n_sample_expectation_vector = 100000
    x_sample = np.random.multivariate_normal(np.zeros(n_uncertain),
                                             np.eye(n_uncertain),
                                             n_sample_expectation_vector)
    psi_squared_sum = DM.zeros(SX.size(psi)[0])
    for i in range(n_sample_expectation_vector):
        psi_squared_sum += psi_fcn(x_sample[i, :])**2
    expectation_vector = psi_squared_sum / n_sample_expectation_vector

    return ls_factor, expectation_vector, psi_fcn