def rf(n,df1,df2,ncp=0): """ Calculates the quantile function of the F-distribution """ from scipy.stats import f,ncf if ncp==0: result=f.rvs(size=n,dfn=df1,dfd=df2,loc=0,scale=1) else: result=ncf.rvs(size=n,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1) return result
def rvs(t,delta_L, df1=100, df2=100): random_part= f.rvs(df1,df2) add=delta_L+t non_random_part=0 if add>1: non_random_part=add else: non_random_part=exp(add-1) print random_part, 1/random_part return t+random_part*(non_random_part-t)
# Display the probability density function (``pdf``): x = np.linspace(f.ppf(0.01, dfn, dfd), f.ppf(0.99, dfn, dfd), 100) ax.plot(x, f.pdf(x, dfn, dfd), 'r-', lw=5, alpha=0.6, label='f pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = f(dfn, dfd) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = f.ppf([0.001, 0.5, 0.999], dfn, dfd) np.allclose([0.001, 0.5, 0.999], f.cdf(vals, dfn, dfd)) # True # Generate random numbers: r = f.rvs(dfn, dfd, size=1000) # And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def bootstrap(a, f=None, b=100, method="balanced", family=None, strata=None, smooth=False, random_state=None): """ Calculate function values from bootstrap samples or optionally return bootstrap samples themselves Parameters ---------- a : array-like Original sample f : callable or None Function to be bootstrapped b : int Number of bootstrap samples method : string * 'ordinary' * 'balanced' * 'parametric' family : string or None * 'gaussian' * 't' * 'laplace' * 'logistic' * 'F' * 'gamma' * 'log-normal' * 'inverse-gaussian' * 'pareto' * 'beta' * 'poisson' strata : array-like or None Stratification labels, ignored when method is parametric smooth : boolean Whether or not to add noise to bootstrap samples, ignored when method is parametric random_state : int or None Random number seed Returns ------- y | X : np.array Function applied to each bootstrap sample or bootstrap samples if f is None """ np.random.seed(random_state) a = np.asarray(a) n = len(a) # stratification not meaningful for parametric sampling if strata is not None and (method != "parametric"): strata = np.asarray(strata) if len(strata) != len(a): raise ValueError("a and strata must have" " the same length") # recursively call bootstrap without stratification # on the different strata masks = [strata == x for x in np.unique(strata)] boot_strata = [ bootstrap(a=a[m], f=None, b=b, method=method, strata=None, random_state=random_state) for m in masks ] # concatenate resampled strata along first column axis X = np.concatenate(boot_strata, axis=1) else: if method == "ordinary": # i.i.d. sampling from ecdf of a X = np.reshape(a[np.random.choice(range(a.shape[0]), a.shape[0] * b)], newshape=(b, ) + a.shape) elif method == "balanced": # permute b concatenated copies of a r = np.reshape([a] * b, newshape=(b * a.shape[0], ) + a.shape[1:]) X = np.reshape(r[np.random.permutation(range(r.shape[0]))], newshape=(b, ) + a.shape) elif method == "parametric": if len(a.shape) > 1: raise ValueError("a must be one-dimensional") # fit parameters by maximum likelihood and sample if family == "gaussian": theta = norm.fit(a) arr = norm.rvs(size=n * b, loc=theta[0], scale=theta[1], random_state=random_state) elif family == "t": theta = t.fit(a, fscale=1) arr = t.rvs(size=n * b, df=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "laplace": theta = laplace.fit(a) arr = laplace.rvs(size=n * b, loc=theta[0], scale=theta[1], random_state=random_state) elif family == "logistic": theta = logistic.fit(a) arr = logistic.rvs(size=n * b, loc=theta[0], scale=theta[1], random_state=random_state) elif family == "F": theta = F.fit(a, floc=0, fscale=1) arr = F.rvs(size=n * b, dfn=theta[0], dfd=theta[1], loc=theta[2], scale=theta[3], random_state=random_state) elif family == "gamma": theta = gamma.fit(a, floc=0) arr = gamma.rvs(size=n * b, a=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "log-normal": theta = lognorm.fit(a, floc=0) arr = lognorm.rvs(size=n * b, s=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "inverse-gaussian": theta = invgauss.fit(a, floc=0) arr = invgauss.rvs(size=n * b, mu=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "pareto": theta = pareto.fit(a, floc=0) arr = pareto.rvs(size=n * b, b=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "beta": theta = beta.fit(a) arr = beta.rvs(size=n * b, a=theta[0], b=theta[1], loc=theta[2], scale=theta[3], random_state=random_state) elif family == "poisson": theta = np.mean(a) arr = poisson.rvs(size=n * b, mu=theta, random_state=random_state) else: raise ValueError("Invalid family") X = np.reshape(arr, newshape=(b, n)) else: raise ValueError("method must be either 'ordinary'" " , 'balanced', or 'parametric'," " '{method}' was supplied".format(method=method)) # samples are already smooth in the parametric case if smooth and (method != "parametric"): X += np.random.normal(size=X.shape, scale=1 / np.sqrt(n)) if f is None: return X else: return np.asarray([f(x) for x in X])
from scipy.stats import f ## # discussion items # 1. Show histogram of all distributions def plot_sample_hist(sample, title): plt.figure() plt.title(title) plt.hist(sample) sample = norm.rvs(size=1000) plot_sample_hist(sample, 'normal distribution') sample = expon.rvs(size=1000) plot_sample_hist(sample, 'exponential distribution') sample = binom.rvs(10, 0.5, size=1000) plot_sample_hist(sample, 'binomial distribution') sample = chi2.rvs(10, size=1000) plot_sample_hist(sample, 'chi-square distribution') sample = t.rvs(10, size=1000) plot_sample_hist(sample, 't distribution') sample = f.rvs(10, 20, size=1000) plot_sample_hist(sample, 'f distribution')
# w_dist = uniform() w_dist = f(dfn=500, dfd=10) # Underlying distribution function w_pctiles = np.linspace( 10**(-10), 1 - 10**(-10), num=n) # Percentiles for which a probability will be calculated f_w = w_dist.pdf(w_dist.ppf(w_pctiles)) # value of the pdf for each percentile f_w = f_w / sum(f_w) # Discretized pdf (i.e. this sums to one) # Set up discount factor beta = .97 # Set up value of unemployment c = 12000 # Set up value function initial guess V_0 = np.sort(f.rvs(dfn=2, dfd=80, size=n)) * wages # Specify number of iterations T = 100 # Get value functions and reservations wages over time V, W = V_iter(V_0, wages, beta, T) # Change to figures directory cd(mdir + fdir) # Set up value function plot fig, ax = plt.subplots(figsize=(8.5 - margin, (8.5 - margin) * figratio)) # Plot the initial guess ax.plot(wages,