def plot_skewed_data(data): # generate histogram using data num_bins = 200 bins = np.linspace(min(data), max(data), num_bins) _, bins = np.histogram(data, bins=bins, density=True) bin_centers = 0.5 * (bins[1:] + bins[:-1]) # get PDF from data by fitting it to skewnorm a, loc, scale = skewnorm.fit(data) pdf = st.skewnorm.pdf(bin_centers, a, loc, scale) # plot kwargs = dict(normed=True, edgecolor='black', linewidth=1.2, alpha=0.5, bins=20, stacked=True) # kwargs = dict(alpha=0.5, bins=20, normed=True, histtype='stepfilled', stacked=False, edgecolor='black', linewidth=1.2) fig, ax = plt.subplots(1, 1) ax.plot(bin_centers, pdf, 'r-', lw=5, alpha=0.6, label='skewnorm pdf') # ax.plot(bin_centers, histogram, ) # Newer version of matplotlib uses density rather than normed ax.hist(data, **kwargs, color='g', label="Histogram of samples (normalized)") ax.axvline(x=np.percentile(data, 1), color='red', ls=':', lw=2, label='1 pc') ax.axvline(x=np.percentile(data, 50), color='red', ls=':', lw=2, label='mean') ax.axvline(x=np.percentile(data, 99), color='red', ls=':', lw=2, label='99 pc') print("%f :: %f" % (skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a))) ax.legend(loc='best', frameon=True) plt.show()
def fake_lnlike(p): sigma, omega, alpha = p eq1 = skewnorm.ppf(0.5, alpha, loc=sigma, scale=omega) - median eq2 = skewnorm.ppf(0.16, alpha, loc=sigma, scale=omega) - (median-lower_err) eq3 = skewnorm.ppf(0.84, alpha, loc=sigma, scale=omega) - (median+upper_err) fake_lnlike = np.log( eq1**2 + eq2**2 + eq3**2 ) # print fake_lnlike return fake_lnlike
def update(val): alpha1 = skew.val mu1 = mathematic_expectation.val sigma1 = standard_deviation.val n1 = sample_n.val t1 = np.random.uniform(0.0, 1.0, int(n1)) s1 = skewnorm.ppf(t1, alpha1, mu1, sigma1) # s1 = norm.ppf(t1, mu1, sigma1) x1, y1 = np.unique(np.around(s1, decimals=1), return_counts=True) t2 = np.random.uniform(0.0, 1.0, int(n1)) s2 = norm.ppf(t2, mu, sigma) x2, y2 = np.unique(np.around(s2, decimals=1), return_counts=True) t, p1 = np.around(ttest_ind(s1, s2), decimals=3) u, p2 = np.around(mannwhitneyu(s1, s2), decimals=3) l.set_xdata(x1) l.set_ydata(y1) l1.set_xdata(x2) l1.set_ydata(y2) te.set_text("T-test: t = {}, p = {}; Mann-Whitney: U = {}, p = {}".format(t, p1, u, p2)) fig.canvas.draw_idle()
def __init__(self,N,init_range,random_ratio,noisetype=None,R=None,Q=None): self.X = np.zeros((N,len(init_range))) self.W = np.ones(N)/N self.N = N self.random_ratio = random_ratio self.init_range = init_range self.noisetype = noisetype if noisetype == 'skewnorm': self.Q = Q s = np.linspace(skewnorm.ppf(0.01, Q[0], Q[1], Q[2]),skewnorm.ppf(0.99, Q[0], Q[1], Q[2]), 100) y = skewnorm.pdf(s, Q[0], Q[1], Q[2]) self.sensingNoiseMode = s[np.argmax(y)] for i,random_range in enumerate(init_range): self.X[:,i] = np.random.uniform(random_range[0],random_range[1],N)
def estimate_distance_cutoff(locs, data_norm, tissue_mat, cutoff=0.0001): ''' Estimate hamming cutoff ''' target_list = list() target_mat = np.asarray(tissue_mat) # run simulation to check the p of selected cutoff # tissue_mat_rand = shuffle(tissue_mat.T).T hdist_list = list() jdist_list = list() hfdist_list = list() for rr in np.arange(10): # tissue_mat_rand = shuffle(tissue_mat.T).T target_mat_rand1 = shuffle(target_mat.T).T target_mat_rand2 = shuffle(target_mat.T).T hdist_rand = cdist(target_mat_rand1, target_mat_rand2, compute_diff_vs_common_const10) hdist_list.append(hdist_rand.flatten()) jdist_rand = cdist(target_mat_rand1, target_mat_rand2, jaccard_dist) jdist_list.append(jdist_rand.flatten()) for ta1 in range(target_mat.shape[0]): for ta2 in range(target_mat.shape[0]): target_mat_rand1_locs = locs[target_mat_rand1[ta1] == 1] target_mat_rand2_locs = locs[target_mat_rand2[ta2] == 1] hfdist_rand = compute_hausdorff(target_mat_rand1_locs, target_mat_rand2_locs) hfdist_list.append(hfdist_rand) hflattened = [val for sublist in hdist_list for val in sublist] ae, loce, scalee = stats.skewnorm.fit(hflattened) hamming_cutoff = skewnorm.ppf(cutoff, ae, loce, scalee) jflattened = [val for sublist in jdist_list for val in sublist] ae, loce, scalee = stats.skewnorm.fit(jflattened) jaccord_cutoff = skewnorm.ppf(cutoff, ae, loce, scalee) ae, loce, scalee = stats.skewnorm.fit(hfdist_list) hausdorff_cutoff = skewnorm.ppf(1 - cutoff, ae, loce, scalee) return hamming_cutoff, jaccord_cutoff, hausdorff_cutoff
def cal_tot_sf(SFR, SFEN): # Skew normal distribution for star formation history # took input: maximum star formation rate, star formation event number import numpy as np from scipy.stats import skewnorm # from scipy.stats import f global skewness, location x = np.linspace(skewnorm.ppf(0.01, skewness, location, 1), skewnorm.ppf(0.99, skewness, location, 1), SFEN) y = skewnorm.pdf(x, skewness, location, 1) # skewnorm.pdf(x, a, loc, scale) is the location and scale parameters, # [identically equivalent to skewnorm.pdf(y, a) / scale with y = (x - loc) / scale] # The scale is not used as the SFEN & SFR setup the scale through parameter tot_sf_set & mult. mult = 10**SFR / max(y) j = 0 tot_sf = 0 while j < SFEN: sf = mult * y[j] tot_sf += sf (j) = (j + 1) return tot_sf, mult, y
def simulate_PDF(median, lower_err, upper_err, size=1, plot=True): ''' Simulates a draw of posterior samples from a value and asymmetric errorbars by assuming the underlying distribution is a skewed normal distribution. Developed to estimate PDFs from literature exoplanet parameters that did not report their MCMC chains. Inputs: ------- median : float the median value that was reported lower_err : float the lower errorbar that was reported upper_err : float the upper errorbar that was reported size : int the number of samples to be drawn Returns: -------- samples : array of float the samples drawn from the simulated skrewed normal distribution ''' sigma, omega, alpha = calculate_skewed_normal_params( median, lower_err, upper_err) samples = skewnorm.rvs(alpha, loc=sigma, scale=omega, size=size) if plot == False: return samples else: lower_err = np.abs(lower_err) upper_err = np.abs(upper_err) x = np.arange(median - 4 * lower_err, median + 4 * upper_err, 0.01) fig = plt.figure() for i in range(3): plt.axvline([median - lower_err, median, median + upper_err][i], color='k', lw=2) plt.plot(x, skewnorm.pdf(x, alpha, loc=sigma, scale=omega), 'r-', lw=2) fit_percentiles = skewnorm.ppf([0.16, 0.5, 0.84], alpha, loc=sigma, scale=omega) for i in range(3): plt.axvline(fit_percentiles[i], color='r', ls='--', lw=2) plt.hist(samples, density=True, color='red', alpha=0.5) return samples, fig
def calc_risk_skewnorm(self, confidence=0.95): port_returns = self.returns.dot(self.allocation) losses = -port_returns.iloc[:, 0] params = skewnorm.fit(losses) VaR = skewnorm.ppf(confidence, *params) tail_loss = skewnorm.expect(lambda y: y, args=(params[0], ), loc=params[1], scale=params[2], lb=VaR) CVaR = (1 / (1 - confidence)) * tail_loss return losses, VaR, CVaR
def spx_implied_var_single(rolling_window, var_pct, vix, skew, spx, option='P'): alpha = -(skew - 100) / 10 period_vix = (np.sqrt( ((vix * vix) / 365) * 1.5) / 100) * np.sqrt(rolling_window) if option == 'C': var_pct = 1 - var_pct pct_var = norm.ppf(var_pct, 0, period_vix) else: pct_var = skn.ppf(var_pct, alpha, 0, period_vix) spx_k_suggestion = spx * np.exp(pct_var) #(1 + pct_var) print('VaR return percent for SPX is: ' + str(round(pct_var * 100, 2))) print('Suggested SPX strike: ' + str(np.floor(spx_k_suggestion))) return spx_k_suggestion
def spx_implied_var(rolling_window, var_pct, mkt_time='Close', option='P'): # Here it's specifying to use the market Open values so that # the worst case will be from market open on trade date to # market close on expiry if mkt_time == 'Open': temp_df = df[[ 'SPX Open', 'SPX Close', 'skew', 'Daily VIX Open', 'Daily VIX Close', 'VIX Close' ]] temp_df['spx_shift'] = temp_df['SPX Close'].shift(-rolling_window) temp_df['vix_shift'] = temp_df['VIX Close'].shift(-rolling_window) del temp_df['SPX Close'], temp_df['Daily VIX Close'] temp_df.columns = [ 'spx', 'skew', 'vix', 'VIX Close', 'spx_shift', 'vix_shift' ] else: # Here the function will be preparing to perform the usual # close to close calculations temp_df = df[['SPX Close', 'skew', 'Daily VIX Close', 'VIX Close']] temp_df.columns = ['spx', 'skew', 'vix', 'VIX Close'] temp_df['spx_shift'] = temp_df['spx'].shift(-rolling_window) temp_df['vix_shift'] = temp_df['VIX Close'].shift(-rolling_window) # Taking daily vix of the day and scaling to the time-span # specified in rolling_window, e.g., for a DTE of 5 days, # the daily vix will be scaled by sqrt(5) temp_df['period_vix'] = temp_df['vix'] * np.sqrt(rolling_window) # Here, the Skew Normal Distribution is invoked to calculate the # worst potential 1% return assuming log returns follow a Skew # Normal Distribution where the SKEW index approximates the # "shape" and the VIX index approximates the "scaling parameter" # Mean is assumed to be 0, however, further testing may be needed # To determine if a rolling mean-return is necessary # Adjusted so that function can check OTM Call VaR given a certain # probability level. Call VaR is assuming a normal distribution to # be conservative while Put VaR is assuming a skew normal distribution # to be conservative. if option == 'C': var_pct = 1 - var_pct temp_df['var_pct'] = norm.ppf(var_pct, 0, temp_df['period_vix']) else: temp_df['var_pct'] = skn.ppf(var_pct, temp_df['skew'], 0, temp_df['period_vix']) # Using the potential 1% return, the corresponding SPX level is # calculated to provide a strike suggestion for the SPX put temp_df['var_spx_lvl'] = temp_df['spx'] * np.exp( temp_df['var_pct']) #(1 + temp_df['var_pct']) # Calculating what the percentage difference is between the actual realized # SPX index versus it's approximated 1% worst case return assuming an SKN # This column is only useful after filtering on breaches temp_df['actual_to_var_diff'] = temp_df['spx_shift'] / temp_df[ 'var_spx_lvl'] - 1 # Calculating the actual SPX return for the given rolling_window temp_df['actual_spx_return'] = temp_df['spx_shift'] / temp_df['spx'] - 1 if option == 'C': plot_df = temp_df[temp_df['var_spx_lvl'] < temp_df['spx_shift']] else: plot_df = temp_df[temp_df['var_spx_lvl'] > temp_df['spx_shift']] fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 10)) plot_df[['var_pct', 'actual_spx_return']].plot(ax=axes[0, 0]) plot_df['actual_spx_return'].plot(ax=axes[1, 0]) plot_df['actual_to_var_diff'].hist(ax=axes[0, 1]) plot_df['VIX Close'].hist(ax=axes[1, 1]) axes[0, 0].set_title('Implied VaR Returns that Breached') axes[1, 0].set_title('Actual SPX Returns for Breach') axes[0, 1].set_title('Distribution of Breach Percentage') axes[1, 1].set_title('Distribution of VIX Close on Trade Day') historical_prob_of_breach = 100 * len(plot_df) / float( len(temp_df.dropna())) print( "--------------------------------------------------------------------") print("") print("The historical probability of breaching is " + str(round(historical_prob_of_breach, 2)) + "%") print("With the total occurences being " + str(len(plot_df)) + " times") if option == 'C': plot_df = pd.DataFrame.sort_values(plot_df, by='actual_to_var_diff', ascending=False) else: plot_df = pd.DataFrame.sort_values(plot_df, by='actual_to_var_diff') print("With the worst 5 cases as follows:") print(plot_df.head()) print("") print( "--------------------------------------------------------------------") print("") print("The latest SPX level and suggested strike is:") print(temp_df[['spx', 'VIX Close', 'skew', 'var_spx_lvl']].tail(3)) return temp_df[[ 'spx', 'spx_shift', 'var_pct', 'var_spx_lvl', 'actual_to_var_diff', 'VIX Close', 'vix_shift' ]]
def bestNstep(self, targetAccRate): r"""!Return the optimum nstep (float) for given target acceptance rate.""" return skewnorm.ppf(targetAccRate, *self.bestFit)
def skew_gauss_ppf(q, A, mu, std, a): return skewnorm.ppf(q, a, loc=mu, scale=std)
a = -10 loc = 0 w = 0.5 median = skewnorm.median(a, loc, w) s = skewnorm.rvs(a, loc, w, 1000) mode = loc + w * m0(a) plt.ion() ax = plt.subplot(111) print(type(s)) print(s.shape) x = np.linspace(skewnorm.ppf(0.01, a, loc, w), skewnorm.ppf(0.99, a, loc, w), 100) y = skewnorm.pdf(x, a, loc, w) mode = x[np.argmax(y)] x = x - mode s = s - mode ax.plot(x, y, 'r-', lw=5, alpha=0.6, label='skewnorm pdf') ax.axvline(x=0, color='y', label='mode') ax.hist(s, density=True) ax.set_xlabel('noise') ax.set_ylabel('Number of samples/PDF') ax.set_title('Skewnormal histogram, shape = %.1f, loc = %.1f, w = %.1f' % (a, loc, w)) text = 'Shift all noise by mode = %.3f' % mode
def transform(self, x): q = skewnorm.cdf(x=x, a=self._a, loc=self._loc, scale=self._scale) z = skewnorm.ppf(q=q, a=0, loc=0, scale=1) return z
def skew_examples(): """Visualize left, right, and no skew distributions.""" # create subplots fig, ax = plt.subplots(1, 3, figsize=(20, 4)) # determine skew a = 4 # find stats for annotation mean_skew_val = skewnorm.mean(a) median_skew_val = skewnorm.median(a) # get x data where PDF has value x = np.linspace(skewnorm.ppf(0.001, a), skewnorm.ppf(0.999, a), 100) # plot left skew ax[0].plot(x * -1, skewnorm.pdf(x, a)) ax[0].set_title('Left/Negative Skewed') # annotate left skew's mode ax[0].axvline(-0.42, 0.72, 0.925, color='orange') ax[0].text(s='mode', x=-0.49, y=0.5, rotation=90) ax[0].axvline(-0.42, 0, 0.53, color='orange') # annotate left skew's median ax[0].axvline(median_skew_val * -1, 0.52, 0.83, color='orange') ax[0].text(s='median', x=-0.74, y=0.35, rotation=90) ax[0].axvline(median_skew_val * -1, 0, 0.3, color='orange') # annotate left skew's mean ax[0].axvline(mean_skew_val * -1, 0.26, 0.77, color='orange') ax[0].text(s='mean', x=-0.84, y=0.16, rotation=90) ax[0].axvline(mean_skew_val * -1, 0, 0.09, color='orange') # plot no skew normal ax[1].plot(x, norm.pdf(x, loc=x.mean(), scale=0.56)) ax[1].set_title('No Skew') # annotate mean, median, and mode ax[1].text(s=' mean\nmedian\n mode', x=x.mean() - 0.25, y=0.25) ax[1].axvline(x.mean(), 0.5, 0.94, color='orange') ax[1].axvline(x.mean(), 0, 0.3, color='orange') # plot right skew ax[2].plot(x, skewnorm.pdf(x, a)) ax[2].set_title('Right/Positive Skewed') # annotate right skew's mode ax[2].axvline(0.42, 0.72, 0.925, color='orange') ax[2].text(s='mode', x=0.35, y=0.5, rotation=90) ax[2].axvline(0.42, 0, 0.53, color='orange') # annotate right skew's median ax[2].axvline(median_skew_val, 0.52, 0.83, color='orange') ax[2].text(s='median', x=0.6, y=0.35, rotation=90) ax[2].axvline(median_skew_val, 0, 0.3, color='orange') # annotate right skew's mean ax[2].axvline(mean_skew_val, 0.26, 0.77, color='orange') ax[2].text(s='mean', x=0.72, y=0.16, rotation=90) ax[2].axvline(mean_skew_val, 0, 0.09, color='orange') # label axes and set y-axis limits for axes in ax: axes.set_xlabel('x') axes.set_ylabel('f(x)') axes.set_ylim(0, 0.75) return ax
from scipy.stats import skewnorm import matplotlib.pyplot as plt fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 2.5)) x1 = np.linspace(skewnorm.ppf(0.01, -3), skewnorm.ppf(0.99, -3), 100) x2 = np.linspace(skewnorm.ppf(0.01, 0), skewnorm.ppf(0.99, 0), 100) x3 = np.linspace(skewnorm.ppf(0.01, 3), skewnorm.ppf(0.99, 3), 100) ax1.plot(skewnorm(-3).pdf(x1), 'k-', lw=4) ax2.plot(skewnorm(0).pdf(x2), 'k-', lw=4) ax3.plot(skewnorm(3).pdf(x3), 'k-', lw=4) #kurt fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 2.5)) axs = [ax1, ax2, ax3] Titles = ["Mesokurtic", "Lepkurtic", "Playkurtic"] #Normal Distoribution dist = scipy.stats.norm(loc=100, scale=5) sample_norm = dist.rvs(size=10000) # Leptokurtic Distoribution dist2 = scipy.stats.laplace(loc=100, scale=5) sample_laplace = dist2.rvs(size=10000) dist3 = scipy.stats.cosine(loc=100, scale=5) sample_cosine = dist3.rvs(size=10000) samples = [sample_norm, sample_laplace, sample_cosine] for n in range(0, len(axs)): axs[n].hist(samples[n], bins='auto', normed=True)
sym_mode = stats.mode(sym_density) sym_mean = np.mean(sym_density) sym_median = np.median(sym_density) plt.subplot(221) """ plt.vlines(sym_mean, 0, 0.7, colors='k') plt.vlines(sym_median, 0, 0.7, colors='g') plt.vlines(sym_mode[0], 0, 0.7, colors='m') """ plt.plot(sym_data, sym_density, 'r-', label='skewnorm pdf') #mean, var, skew, kurt = skewnorm.stats(a, moments='mvsk') """positively skewed dataset""" a = 2 pos_skewed = np.linspace(skewnorm.ppf(0.1, a), skewnorm.ppf(0.99, a), 100) #pos_skewed = np.random.exponential(size=100) #pos_density =expon.pdf(pos_skewed) pos_density = skewnorm.pdf(pos_skewed, a) pos_mode = stats.mode(pos_density) pos_mean = np.mean(pos_density) pos_median = np.median(pos_density) plt.subplot(222) """ plt.vlines(pos_mean, 0, 0.7, colors='k') plt.vlines(pos_median, 0, 0.7, colors='g') plt.vlines(pos_mode[0], 0, 0.7, colors='m') """ plt.plot(pos_skewed, pos_density, 'r-', label='skewnorm pdf') """negatively skewed dataset""" a = -4
from scipy.stats import skewnorm import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: a = 4 mean, var, skew, kurt = skewnorm.stats(a, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a), 100) ax.plot(x, skewnorm.pdf(x, a), 'r-', lw=5, alpha=0.6, label='skewnorm pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = skewnorm(a) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = skewnorm.ppf([0.001, 0.5, 0.999], a) np.allclose([0.001, 0.5, 0.999], skewnorm.cdf(vals, a)) # True
# https://stackoverflow.com/questions/66986076/matplotlib-time-on-x-axis-from-datetime-json from scipy.stats import skewnorm import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.dates import DateFormatter fig, ax = plt.subplots(1, 1) a = 4 x = np.linspace(skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a), 50) d = pd.date_range("2021-04-06 12:00:00", "2021-04-06 16:00:00", 50) ax.plot(d, skewnorm.pdf(x, a), 'b-', label='skewnorm pdf') # from matplotlib.dates import AutoDateFormatter, AutoDateLocator # xtick_locator = AutoDateLocator(minticks=3, maxticks=15) # xtick_formatter = AutoDateFormatter(xtick_locator) # ax.xaxis.set_major_locator(xtick_locator) # ax.xaxis.set_major_formatter(xtick_formatter) ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d %H:%M:%S')) fig.autofmt_xdate() plt.show() # used this code sample as a reference: https://matplotlib.org/stable/gallery/text_labels_and_annotations/date.html # found out how to use scipy.skewnorm from here: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skewnorm.html. Stil not sure about all the values, though # see autofmt_xdate(): https://matplotlib.org/stable/api/figure_api.html?highlight=autofmt#matplotlib.figure.Figure.autofmt_xdate # pandas.date_range() is pretty useful here. Found out about this from https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html
def inverse_transform(self, z, copy=None): q = skewnorm.cdf(x=z, a=0, loc=0, scale=1) x = skewnorm.ppf(q=q, a=self._a, loc=self._loc, scale=self._scale) return x
def generic_dispersion(self, nd_dict, GH_dict=None): weight_arrays = [] value_arrays = [] for i in range(0, len(self.simulation_options["dispersion_parameters"])): if self.simulation_options["dispersion_distributions"][ i] == "uniform": value_arrays.append( np.linspace( self.simulation_options["dispersion_parameters"][i] + "_lower", self.simulation_options["dispersion_parameters"][i] + "_upper", self.simulation_options["dispersion_bins"][i])) weight_arrays.append( [1 / self.simulation_options["dispersion_bins"][i]] * self.simulation_options["dispersion_bins"][i]) elif self.simulation_options["dispersion_distributions"][ i] == "normal": param_mean = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_mean"] param_std = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_std"] if type(GH_dict) is dict: param_vals = [ (param_std * math.sqrt(2) * node) + param_mean for node in GH_dict["nodes"] ] param_weights = GH_dict["normal_weights"] else: min_val = norm.ppf(1e-4, loc=param_mean, scale=param_std) max_val = norm.ppf(1 - 1e-4, loc=param_mean, scale=param_std) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = norm.cdf(param_vals[0], loc=param_mean, scale=param_std) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = norm.ppf((1e-4 / 2), loc=param_mean, scale=param_std) for j in range( 1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = norm.cdf( param_vals[j], loc=param_mean, scale=param_std) - norm.cdf(param_vals[j - 1], loc=param_mean, scale=param_std) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 param_vals = param_midpoints value_arrays.append(param_vals) weight_arrays.append(param_weights) elif self.simulation_options["dispersion_distributions"][ i] == "lognormal": param_loc = 0 param_shape = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_shape"] param_scale = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_scale"] print("shape, scale", param_shape, param_scale) min_val = lognorm.ppf(1e-4, param_shape, loc=param_loc, scale=param_scale) max_val = lognorm.ppf(1 - 1e-4, param_shape, loc=param_loc, scale=param_scale) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = lognorm.cdf(param_vals[0], param_shape, loc=param_loc, scale=param_scale) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = lognorm.ppf((1e-4 / 2), param_shape, loc=param_loc, scale=param_scale) for j in range(1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = lognorm.cdf( param_vals[j], param_shape, loc=param_loc, scale=param_scale) - lognorm.cdf(param_vals[j - 1], param_shape, loc=param_loc, scale=param_scale) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 value_arrays.append(param_midpoints) weight_arrays.append(param_weights) elif self.simulation_options["dispersion_distributions"][ i] == "skewed_normal": param_mean = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_mean"] param_std = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_std"] param_skew = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_skew"] min_val = skewnorm.ppf(1e-4, param_skew, loc=param_mean, scale=param_std) max_val = skewnorm.ppf(1 - 1e-4, param_skew, loc=param_mean, scale=param_std) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = skewnorm.cdf(param_vals[0], param_skew, loc=param_mean, scale=param_std) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = skewnorm.ppf((1e-4 / 2), param_skew, loc=param_mean, scale=param_std) for j in range(1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = skewnorm.cdf( param_vals[j], param_skew, loc=param_mean, scale=param_std) - skewnorm.cdf(param_vals[j - 1], param_skew, loc=param_mean, scale=param_std) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 value_arrays.append(param_midpoints) weight_arrays.append(param_weights) elif self.simulation_options["dispersion_distributions"][ i] == "log_uniform": param_upper = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_logupper"] param_lower = nd_dict[ self.simulation_options["dispersion_parameters"][i] + "_loglower"] min_val = loguniform.ppf(1e-4, param_lower, param_upper, loc=0, scale=1) max_val = loguniform.ppf(1 - 1e-4, param_lower, param_upper, loc=0, scale=1) param_vals = np.linspace( min_val, max_val, self.simulation_options["dispersion_bins"][i]) param_weights = np.zeros( self.simulation_options["dispersion_bins"][i]) param_weights[0] = loguniform.cdf(min_val, param_lower, param_upper, loc=0, scale=1) param_midpoints = np.zeros( self.simulation_options["dispersion_bins"][i]) param_midpoints[0] = loguniform.ppf((1e-4) / 2, param_lower, param_upper, loc=0, scale=1) for j in range(1, self.simulation_options["dispersion_bins"][i]): param_weights[j] = loguniform.cdf( param_vals[j], param_lower, param_upper, loc=0, scale=1) - loguniform.cdf(param_vals[j - 1], param_lower, param_upper, loc=0, scale=1) param_midpoints[j] = (param_vals[j - 1] + param_vals[j]) / 2 value_arrays.append(param_midpoints) weight_arrays.append(param_weights) total_len = np.prod(self.simulation_options["dispersion_bins"]) weight_combinations = list(itertools.product(*weight_arrays)) value_combinations = list(itertools.product(*value_arrays)) sim_params = copy.deepcopy( self.simulation_options["dispersion_parameters"]) for i in range(0, len(sim_params)): if sim_params[i] == "E0": sim_params[i] = "E_0" if sim_params[i] == "k0": sim_params[i] = "k_0" return sim_params, value_combinations, weight_combinations
def sep_two_skewed_normals(x, th_init): x0 = x[x < th_init] x1 = x[x >= th_init] if x0.size == 0: return th_init, (x.min() - 1, x1.mean(), 0.01, x1.std(), 0, 0) if x1.size == 1: a1 = TH_SKEWNESS m1 = x0.mean() s1 = MIN_SCALE else: a1, m1, s1 = skewnorm.fit(x1) if a1 > TH_SKEWNESS: a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS) if x0.size == 1: a0 = TH_SKEWNESS m0 = x0.mean() s0 = MIN_SCALE else: a0, m0, s0 = skewnorm.fit(x0) if a0 > TH_SKEWNESS: a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS) num_x0_last = x0.size num_change = 1 x_sorted = sorted(x) nums_x0 = [num_x0_last, ] while num_change: # E, binary search for new th i0 = int(x0.size/2) i1 = x.size - int(x1.size/2) while i1 - i0 > 1: i = int((i0 + i1) / 2) p0 = skewnorm.pdf(x_sorted[i], a0, m0, s0) - skewnorm.pdf(x_sorted[i], a1, m1, s1) if p0 > 0: i0 = i else: i1 = i th = (x_sorted[i0] + x_sorted[i1]) / 2 x0 = x[x < th] x1 = x[x >= th] # M if x0.size == 0: break if x1.size == 1: a1 = TH_SKEWNESS m1 = x0.mean() s1 = MIN_SCALE else: a1, m1, s1 = skewnorm.fit(x1) if a1 > TH_SKEWNESS: a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS) if x0.size == 1: a0 = TH_SKEWNESS m0 = x0.mean() s0 = MIN_SCALE else: a0, m0, s0 = skewnorm.fit(x0) if a0 > TH_SKEWNESS: a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS) # update num_change = x0.size - num_x0_last num_x0_last = x0.size if num_x0_last not in nums_x0: nums_x0.append(num_x0_last) else: break th = min(skewnorm.ppf(TH_SKEWNORM_PPF, a0, m0, s0), th) # extreme case that under very weak L1 constraint, negligible cluster is fitted with large sigma if s1 > 0.1 and s0 / s1 > 10: th = min(skewnorm.ppf(1e-4, a1, m1, s1), th) return th, (m0, m1, s0, s1, a0, a1)
ax[0][0].plot(x, norm.pdf(x, loc=0, scale=1), 'bo', alpha=0.6, label='norm pdf (scipy)') ax[0][0].plot(x, normal_pdf(x, mu=0, sigma=1), 'r.', alpha=0.6, label='norm pdf (custom)') ax[0][0].legend(loc='best', frameon=False) ax[0][0].set_title("Normal PDF Comparison ($\mu = 0$ $\sigma = 1$)") ax[1][0].plot(x, norm.cdf(x), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[1][0].plot(x, normal_cdf(x, mu=0, sigma=1), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)') ax[1][0].legend(loc='best', frameon=False) ax[1][0].set_title("Normal CDF Comparison ($\mu = 0$ $\sigma = 1$)") ax[2][0].plot(y, norm.ppf(y), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[2][0].legend(loc='best', frameon=False) ax[2][0].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)") ax[0][1].plot(x, skewnorm.pdf(x, loc=0, scale=1, a=4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[0][1].plot(x, skew_normal_pdf(x, epsilon=0, omega=1, alpha=4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)') ax[0][1].legend(loc='best', frameon=False) ax[0][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)") ax[1][1].plot(x, skewnorm.cdf(x, loc=0, scale=1, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[1][1].plot(x, skew_normal_cdf(x, epsilon=0, omega=1, alpha=-4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)') ax[1][1].legend(loc='best', frameon=False) ax[1][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)") ax[2][1].plot(y, skewnorm.ppf(y, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)') ax[2][1].legend(loc='best', frameon=False) ax[2][1].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)") plt.show()
# 'conjunction':{'PoSTagging':0.0, 'is':0.1, 'used':0.0, 'to':0.5, 'annotate':0.0, 'complex':0.0, 'sentences':0.0, 'with':0.4,'PoS':0.0} # } #Part of Speech Tagging - Named Entity Recognition #observations=['PoSTagging', 'is', 'used', 'to', 'annotate', 'complex', 'sentences', 'with','PoS'] #observations=['Python','is','interpreted'] obs_file = open("NamedEntityRecognition_HMMViterbi_CRF.observations", "r") observations = obs_file.read().split() emission_probabilities = {} cnt = len(states) for s in states: emissionsdict = {} x = np.linspace(skewnorm.ppf(0.01, 5, cnt * 0.5, 1), skewnorm.ppf(0.99, 5, cnt * 0.5, 1), len(observations)) sknormpdf = skewnorm.pdf(x, 5, cnt * 0.5, 1) sknormpdf_weighted = [] for p, q in zip(sknormpdf, x): sknormpdf_weighted.append(p * q * 0.01) print "skewnorm pdf weighted:", sknormpdf_weighted ax.plot(x, skewnorm.pdf(x, 5, cnt * 0.5, 1), 'r-', lw=5, alpha=0.6, label='skewnorm pdf') obs_cnt = 0 for o in observations: #print "emissiondict[o] = ",float(obs_cnt+cnt)/float(len(states) + len(observations))
## Null Hypothesis - No Skewness # In[375]: # Test the data for skewness print("Skewtest result: ", skewtest(losses)) # In[376]: # Fit the portfolio loss data to the skew-normal distribution params = skewnorm.fit(losses) # In[377]: # Compute the 95% VaR from the fitted distribution, using parameter estimates VaR_95 = skewnorm.ppf(0.95, *params) print("VaR_95 from skew-normal: ", VaR_95) # Losses are not normally distributed as the critical value exceeeds the 99% conidence interval of test statistic value # Losses can be skewed # # Definition wiki - anderson # In many cases (but not all), you can determine a p value for the Anderson-Darling statistic and use that value to help you # determine if the test is significant are not. Remember the p ("probability") value is the probability of getting a result # #that is more extreme if the null hypothesis is true. If the p value is low (e.g., <=0.05), you conclude that the data do # not follow the normal distribution. Remember that you chose the significance level even though many people just use 0.05 # the vast majority of the time. We will look at two different data sets and apply the Anderson-Darling test to both sets. # # # Note that although the VaR estimate for the
import numpy as np from scipy.stats import skewnorm from master import * ##start## a = 2 steps = 1000 color_max = 1 / 255 t_max = 300 * skewnorm.ppf(0.5, a, scale=color_max) p_paths = CreatePixelPaths(steps) t = np.linspace(skewnorm.ppf(0.01, a, scale=color_max), t_max, steps) count = 0 while (count < 100): color_index = np.random.randint(0, 3) offset = np.random.uniform(low=0, high=t_max) pulse = skewnorm.pdf(t, a, loc=offset, scale=color_max) for i in range(total_lights): for j in range(steps): p_paths[0][j][color_index] += pulse[j] p_paths[i][j][color_index] = p_paths[0][j][color_index] count += 1 ##end of random_rbg_pulses part## ShowPaths(p_paths)