def train_single_distribution(data): dim = np.array(data).shape[1] params = [] # dim rows, each for a dimension for i in range(dim): X = data[:,i] params.append(t.fit(X)) # scipy.stats return np.array(params)
def calibrate(self, week_num): # 61 >= week_num >= 53 mu = [np.mean(item) for item in np.asarray(self.log_return)[:, self.index[week_num-53]:self.index[week_num]]] self.mu = mu cov_matrix = np.cov(np.asarray(self.log_return)[:, self.index[week_num-53]:self.index[week_num]]) self.cov_matrix = cov_matrix ## Fitted by normal X_normal = np.random.multivariate_normal(mu, cov_matrix, 1000) L = np.array( [-sum( self.lambda_dict[self.end_date[week_num - 1]] * np.asarray(self.price_list)[:, self.index[week_num]] * (np.exp(np.asarray(X_normal[i])) - 1) ) for i in range(len(X_normal[:, 0])) ] ) weight = 1 / 15 L_delta = np.array([sum(-weight * self.V_t[week_num] * np.asarray(X_normal[i])) for i in range(len(X_normal[:, 0]))] ) VaR = np.percentile(L_delta, 0.95); ## Fitted by t-student L_act = [-(self.V_t[self.index[i+1]] - self.V_t[self.index[i]]) for i in range(week_num-53, week_num)] parameters = t.fit(L_act) L_t = t.rvs(parameters[0], parameters[1], parameters[2], 1000) return [L, L_delta, L_t]
def compute_cvar(losses, ci, distribution='normal'): """ CVaR: measures expected loss given a minimum loss equal to the (theoretical) VaR :param losses: :param ci: :return: """ allowed_distributions = ['normal', 'student'] assert distribution in allowed_distributions, f'distribution should be in : {allowed_distributions}' if distribution == allowed_distributions[0]: pm, ps = losses.mean(), losses.std() var = norm.ppf(ci, loc=pm, scale=ps) tail_loss = norm.expect(lambda x: x, loc=pm, scale=ps, lb=var) if distribution == allowed_distributions[1]: fitted = t.fit(losses) var = t.ppf(ci, *fitted) tail_loss = t.expect(lambda y: y, args=(fitted[0], ), loc=fitted[1], scale=fitted[2], lb=var) cvar = (1 / (1 - ci)) * tail_loss return cvar
def student_VAR_calculate(his, n, alpha): ##Calculate the daily returns of stock ret = (his['closing_price'] - his['closing_price'].shift(n) * 1.0) / his['closing_price'].shift(n) ##Normal distribution best fit, mu_norm=mean, sig_norm=standard deviation mu_norm, sig_norm = norm.fit(ret[n:].values) # Student t distribution best fit (finding: nu, which is degrees of freedom) parm = t.fit(ret[n:].values) nu, mu_t, sig_t = parm nu = np.round(nu) #Set parameters for calculating value at risk from the distribution h = 1 lev = 100 * (1 - alpha) xanu = t.ppf(alpha, nu) VaR_t = np.sqrt( (nu - 2) / nu) * t.ppf(1 - alpha, nu) * sig_norm - h * mu_norm #CVaR_t = -1/alpha * (1-nu)**(-1) * (nu-2+xanu**2) * t.pdf(xanu, nu)*sig_norm - h*mu_norm mean_return = mu_norm * n mean_standard_dev = sig_norm * np.sqrt(n) return mean_return, mean_standard_dev, VaR_t
def copulafit(u, family='gaussian'): ''' Fit copula to data. Returns correlation matrix and degrees of freedom for t student ''' rhohat = None # correlation matrix nuhat = None # degrees of freedom (for t student) if family=='gaussian': u[u>=1.0] = 0.999999 inv_n = ndtri(u) rhohat = np.corrcoef(inv_n.T) elif family=='t': raise ValueError("Not implemented") # TODO: no encaja con los datos. no funciona x = np.linspace(np.min(u), np.max(u),100) inv_t = np.ndarray((len(x), u.shape[1])) for j in range(u.shape[1]): param = t.fit(u[:,j]) t_pdf = t.pdf(x,loc=param[0],scale=param[1],df=param[2]) inv_t[:,j] = t_pdf # TODO CORRELACION? NUHAT? rhohat = np.corrcoef(inv_n.T) nuhat = None else: raise ValueError("Wrong family parameter. Use 'gaussian' or 't'") return rhohat, nuhat
def _fit(self, X): df, loc, scale = t.fit(X) self._params = { 'df': df, 'loc': loc, 'scale': scale }
def CVaR_t(self, alpha, h): """ h is number of days """ self.nu, self.mu, self.sigma = t.fit(self.rets) mu_h, sigma_h = self.mu * (h/len(self.rets)), self.sigma * ((h/len(self.rets)) ** 0.5) self.nu = np.round(self.nu) xanu = t.ppf(alpha, self.nu) CVAR = self.CVaR_t_stats(alpha, 1, self.nu, xanu, mu_h, sigma_h) return pd.DataFrame({"CVaR (t-statistics)": CVAR}, index = {"Confidence {0:.1f}%".format(100-alpha*100)})
def duplicates(): df = get_duplicate_data() df['error'] = df["POSIX_AGG_PERF_BY_SLOWEST_LOG10"] - df["prediction"] df = df[np.abs(df.error) < np.log10(1.5)] df.time_diff = np.log10(df.time_diff + 0.01) # df.error = np.abs(df.error) cuts = [-np.inf] + list(range(9)) groups = [ df[(df.time_diff >= low) & (df.time_diff < high)].error for low, high in zip(cuts[:-1], cuts[1:]) ] # fit a student t distribution from scipy.stats import t param = t.fit(groups[0]) norm_gen_data = t.rvs(param[0], param[1], param[2], 10000) groups = list(reversed([norm_gen_data] + groups)) labels = list( reversed(["t-distribution fit", "0s to 1s"] + [ "$10^{}s$ to $10^{}s$".format(low, high) for low, high in zip(cuts[1:-1], cuts[2:]) ])) fig, axes = joypy.joyplot(groups, colormap=matplotlib.cm.coolwarm_r, overlap=0.3, linewidth=1., ylim='own', range_style='own', tails=0.2, bins=100, labels=labels, figsize=(2.5, 3)) for idx, ax in enumerate(axes): try: ax.set_yticklabels([labels[idx]], fontsize=8, rotation=120) except: pass ax.set_xlim([-0.2, 0.2]) ax.set_xticks(np.log10([1 / 1.5, 1 / 1.2, 1, 1.2, 1.5])) ax.set_xticklabels([ "$.67\\times$", "$.83\\times$", "$1\\times$", "$1.2\\times$", "$1.5\\times$" ], rotation=90, fontsize=8) plt.xlabel("Error", rotation=180) plt.ylabel("Time ranges") plt.savefig("figures/figure_5.pdf", dpi=600, bbox_inches='tight', pad_inches=0)
def __init__(self, mode=0, elem=None, sample=None): if mode == 0: self.df = elem[0] self.mu = elem[1] self.sigma = elem[2] else: self.df, self.mu, self.sigma = t.fit(sample) self.math_average = t.mean(self.df, loc=self.mu, scale=self.sigma) self.dispersion = t.var(self.df, loc=self.mu, scale=self.sigma)
def VaR_t(self, alpha, h, verbose): """ h is number of days """ self.nu, self.mu, self.sigma = t.fit(self.rets) mu_h, sigma_h = self.mu * (h/len(self.rets)), self.sigma * ((h/len(self.rets)) ** 0.5) VAR = self.VaR_t_stats(alpha, h, self.nu, self.mu, sigma_h) if verbose: return pd.DataFrame({"VaR (t-statistics)": VAR}, index = {"Confidence {0:.1f}%".format(100-alpha*100)}) else: return VAR
def show_generation_result(): # Define 2d model for cycle length and cycle length variability random.seed(19101985) np.random.seed(28111987) plt.style.use('seaborn-talk') fig_x, fig_y = 6, 6 x, y = np.mgrid[20:40:0.1, 0:7:0.05] par_space_points = np.dstack((x, y)) toy_generator = Generator(gen='double_gamma') x_range, y_range = (25, 40), (1, 5) z = toy_generator.get_pdf(par_space_points) # Generate a few points in this parameter space data = toy_generator.generate_people(10) # Plotting with PdfPages('plots/multivariate_test.pdf') as pages: # 2D model fig = plt.figure(1, figsize=(fig_x, fig_y)) ax = fig.add_subplot(111) ax.contour(x, y, z, 25, linewidths=0.7, cmap=plt.cm.Greys, linestyles='dashed', zorder=0) for x, y, name, color in zip(data['true_cl'], data['true_clv'], data['name'], data['color']): plotting.add_point(ax=ax, pos=(x, y), x_range=x_range, y_range=y_range, name=name, color=color) plotting.set_axis_info(ax, x_range=x_range, y_range=y_range, title_x='Cycle Length', title_y='Cycle Length Variability') pages.savefig(fig, bbox_inches='tight', pad_inches=0.1) plt.close() # Individual gaussians for index, row in data.iterrows(): fig = plt.figure(1, figsize=(fig_x, fig_y)) ax = fig.add_subplot(111) vals, color = row['cls'], row['color'] kwargs = dict(color=color, linewidth=1.5) counts, bins, patches = ax.hist(vals, density=True, histtype='stepfilled', bins=40, alpha=0.3, **kwargs) bin_cs = np.array(math_ops.get_bin_centers(bins)) authors, labels = [patches[0]], ['Generated values ({})'.format(len(vals))] try: bf_pars = t.fit(vals) best_fit, = ax.plot(bin_cs, t.pdf(bin_cs, *bf_pars), linestyle='--', **kwargs) nothing = plotting.get_dummy_element() authors += [nothing, best_fit, nothing] labels += ['', 'Best fit', '$\mu\' = {1:.2f}, \sigma\' = {2:.2f}, \\nu\' = {0:.1f}$'.format(*bf_pars)] true_pars = (row['true_cl'], row['true_clv']) true_vals, = ax.plot(bin_cs, norm.pdf(bin_cs, *true_pars), linestyle='-', **kwargs) authors += [nothing, true_vals, nothing] labels += ['', 'True distribution', '$\mu = {0:.2f}, \sigma = {1:.2f}$'.format(*true_pars)] except RuntimeError: print('Fit failed ---> ', len(vals)) plotting.set_axis_info(ax, title_x='Cycle Length', title_y='Arbitrary Units') ax.legend(authors, labels, loc='upper left', fontsize=10, framealpha=0.1) pages.savefig(fig, bbox_inches='tight', pad_inches=0.1) plt.close()
def historical_VaR(returns, rolling_size, alpha=0.05): """ Computing 20 days realized Value at Risk. """ realized_volatility = returns.rolling(rolling_size).std() realized_mean = returns.rolling(rolling_size).mean() # Value at Risk based on realized data with t distribution assumption df = t.fit(returns)[0] z_t = t.ppf([alpha], df=df) return realized_mean[:-1] + realized_volatility[:-1] * z_t
def fit(self, trajectory=None): """ Fits parameters to currently stored or provided data. Args: trajectory: array_like, data to fit or None Returns: fitted parameters: fd, loc, state """ if trajectory is None: if self.df is None: self.df, self.loc, self.scale = student_t.fit(self.trajectory[-self.mask_idx:]) self.df = np.clip(self.df, 3, None) return self.df, self.loc, self.scale else: df, loc, scale = student_t.fit(trajectory) df = np.clip(df, 3, None) return df, loc, scale
def naiveTopt(icm,cutoff=.05): #like topt but doesn't correct for tail direction """ Returns cluster by fitting t-test and returning residues above cutoff """ param = t.fit(icm,loc=np.median(icm)) x = np.linspace(-1,1,200) cdf = t.cdf(x,param[0],loc=param[1], scale=param[2]) minx = np.max(x[np.nonzero(cdf<cutoff)]) # deal with direction of tail: cursect = np.array([i for i in range(icm.size) if icm[i]<minx]) return cursect
def calc_risk_t(self, confidence=0.95): port_returns = self.returns.dot(self.allocation) losses = -port_returns.iloc[:, 0] params = t.fit(losses) VaR = t.ppf(confidence, *params) tail_loss = t.expect(lambda y: y, args=(params[0], ), loc=params[1], scale=params[2], lb=VaR) CVaR = (1 / (1 - confidence)) * tail_loss return losses, VaR, CVaR
def initialize(y: np.ndarray): """ Function that calculates the starting values, for each distributional parameter individually. y: np.ndarray Data from which starting values are calculated. """ nu_fit, loc_fit, scale_fit = student_t.fit(y) location_init = StudentT.param_dict_inv()["location_inv"](loc_fit) scale_init = StudentT.param_dict_inv()["scale_inv"](scale_fit) nu_init = StudentT.param_dict_inv()["nu_inv"](nu_fit) start_values = np.array([location_init, scale_init, nu_init]) return start_values
def calc_CVar(Df, alpha=0.05, dist='n'): rtn = Df / Df.shift(1) - 1 if dist == 'n': mu, sig = norm.fit(rtn.dropna().values) mu = mu * 252 sig = sig * 252**(0.5) CVar = alpha**(-1.) * norm.pdf(norm.ppf(alpha)) * sig - mu if dist == 't': nu, mu, sig = t.fit(rtn.dropna().values) mu = mu * 252 sig = sig * 252**(0.5) xanu = t.ppf(alpha, nu) CVar = -1. / alpha * (1 - nu)**(-1.) * (nu - 2 + xanu**2.) * t.pdf( xanu, nu) * sig - mu return CVar
def wavelet_values(ax): model_name = 'vgg16' new_size = 50 wavelet_data_file = 'Results/'+model_name+'_wavelet_ssm_'+str(new_size)+'.h5' results = h5py.File(wavelet_data_file, 'r') model_layers = results.keys() model_layers = [l for l in model_layers if l.split('_')[0].isdigit()] model_layers.sort(key=lambda x: int(x.split('_')[0])) # r_vals = [] # for l in layers: # r_vals.append(data[l+'/ssm/r'].value) BONF_CRE = len(model_layers) POOL_INDICES = np.array([0,3,6,10,14,18]) ssm = np.array([results[l]['ssm/r'].value for l in model_layers]) shuffles = [results[l]['ssm/rshuffles'] for l in model_layers] t_shuffles = [t.fit(s) for s in shuffles] # norm_shuffles = [norm.fit(s) for s in shuffles] p_vals = np.array([1.0 - t.cdf(s, *tparams) for s,tparams in zip(ssm, t_shuffles)]) # norm_p_vals = [1.0 - norm.cdf(s, *tparams) for s,tparams in zip(ssm, norm_shuffles)] shuffle_mean_ssm = [np.mean(results[l]['ssm/rshuffles'].value) for l in model_layers] shuffle_std_ssm = [np.std(results[l]['ssm/rshuffles'].value) for l in model_layers] shuffle_min_ssm = np.array(shuffle_mean_ssm) - np.array(shuffle_std_ssm) shuffle_max_ssm = np.array(shuffle_mean_ssm) + np.array(shuffle_std_ssm) ssm = ssm[POOL_INDICES] p_vals = p_vals[POOL_INDICES] shuffle_min_ssm = shuffle_min_ssm[POOL_INDICES] shuffle_max_ssm = shuffle_max_ssm[POOL_INDICES] sig = np.array([p < ALPHA/BONF_CRE for p in p_vals]) sig_index = np.where(sig)[0] ax.plot(sig_index, ssm[sig_index], 'o',color='k', markersize=5, alpha=0.5) ax.plot(ssm, '--', color='k', linewidth=3, alpha=0.5)
def topt(icm,cutoff=.05): """ Returns cluster by fitting t-test and returning residues above cutoff """ param = t.fit(icm,loc=np.median(icm)) x = np.linspace(-1,1,200) cdf = t.cdf(x,param[0],loc=param[1], scale=param[2]) minx = np.max(x[np.nonzero(cdf<cutoff)]) # deal with direction of tail: if icm[np.nonzero(np.abs(icm)==np.max(np.abs(icm)))]<0: cursect = np.array([i for i in range(icm.size) if icm[i]<minx]) else: maxx = np.min(x[np.nonzero(cdf>(1-cutoff))]) cursect = np.array([i for i in range(icm.size) if icm[i]>maxx]) return cursect
def VaR(r_0, data, time, percentile=0.99, upward=True): # Model calibration k, mu, sigma = model_calibration(data) # VaR calculation expectation = r_0 * np.exp(-k * time) + mu * (1 - np.exp(-k * time)) variance = (sigma**2 / 2 * k) * (1 - np.exp(-2 * k * time)) # Estimate the degrees of freedom for the t-distribution t_df = tdistr.fit(data)[0] if upward: return np.ravel(expectation + np.sqrt(variance) * tdistr.ppf(percentile, t_df)) else: return np.ravel(expectation - np.sqrt(variance) * tdistr.ppf(percentile, t_df))
def fit(data: FloatIterable, nu: float = None, mu: float = 0, sigma: float = 1) -> 'StudentsT': """ Fit a Students-T distribution to the data. :param data: Iterable of data to fit to. :param nu: Optional fixed value for degrees of freedom. :param mu: Optional fixed value for mean. Default is 0. :param sigma: Optional fixed value for standard deviation. Default is 1 """ kwargs = {} for arg, kw in zip((nu, mu, sigma), ('fdf', 'floc', 'fscale')): if arg is not None: kwargs[kw] = arg nu, mu, sigma = t.fit(data=data, **kwargs) return StudentsT(nu=nu, mu=mu, sigma=sigma)
def get_dists(lr, lb): kde_dict = dict() t_dict = dict() norm_dict = dict() for i in range(3): temp = lr[lb == i] xr = np.linspace(lr.min(), lr.max(), 1000) kde = gaussian_kde(temp) y = kde(xr) kde_dict[str(i)] = y t_pdf = t.pdf(xr, *t.fit(temp)) n_pdf = norm.pdf(xr, *norm.fit(temp)) t_dict[str(i)] = t_pdf norm_dict[str(i)] = n_pdf return xr, kde_dict, t_dict, norm_dict
def is_t_distributed(X, K=500): # get t parameters nu, mu, sigma = t.fit(X, loc=X.mean(), scale=X.std()) # Kolmogorov-Smirnoff of original sample stat0, _ = kstest(X.to_numpy(), t.cdf, args=(nu, )) # distribution d = [] for k in range(K): # generate tsample = t.rvs(nu, loc=mu, scale=sigma, size=X.shape[0]) # KS stat, _ = kstest(tsample, t.cdf, args=(nu, )) d.append(stat) d = np.array(d) # compute pvalue pvalue = (np.sum(d > stat0) + 1) / (d.shape[0] + 1) return Distr(pvalue)
def error_model(data): #fitting a normal distribution #fitting t dist df, mu, std = t.fit(data, floc=0) #fitting norm dist mu1, std1 = norm.fit(data, floc=0) print ' T dist parameteres are', a, mu, std print 'Gaussian parameters are', mu1, std1 plt.figure() # Plot the histogram. plt.hist(data, bins=25, normed=True, alpha=0.5) # Plot the PDF. xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p1 = norm.pdf(x, loc=mu1, scale=std1) p = t.pdf(x, df, loc=mu, scale=std) kde_data = data[:, np.newaxis] X_plot = np.linspace(xmin, xmax, 1000)[:, np.newaxis] # Gaussian KDE kde = KernelDensity(kernel='gaussian', bandwidth=0.035).fit(kde_data) log_dens = kde.score_samples(X_plot) #plt.plot(X_plot[:, 0], np.exp(log_dens), linewidth = 2) plt.plot(x, p, 'k', linewidth=2, label='t-distribution') plt.plot(x, p1, 'r', linewidth=2, label='Gaussian distribution') title = "Fit results: mu = %.2f, std = %.2f" % (mu, std) #plt.title(title) plt.xlabel('Normalized Error', fontsize=16) plt.ylabel('Probability of Error', fontsize=16) #plt.ylim(0,6) #plt.title('Sensor 1', fontsize = 17) plt.legend() plt.show()
def compute_theoretical_var(losses, ci, distribution='normal'): """ Compute theoretical VaR (Value at Risk) at a given confidence level :param losses: :param ci: :return: """ allowed_distributions = ['normal', 'student', 'gaussian_kde'] assert distribution in allowed_distributions, f'distribution should be in : {allowed_distributions}' if distribution == allowed_distributions[0]: pm, ps = losses.mean(), losses.std() res = norm.ppf(ci, loc=pm, scale=ps) if distribution == allowed_distributions[1]: fitted = t.fit(losses) res = t.ppf(ci, *fitted) if distribution == allowed_distributions[2]: fitted = gaussian_kde(losses) sample = fitted.resample(100000) res = np.quantile(sample, ci) return res
# QQ plot using Normal Dist nJDmean, nJDsigma = norm.fit(past_log_stk["JD"]) stats.probplot(past_log_stk["JD"], dist="norm", plot=pylab) plt.title("JD's QQ plot using Normal distribution") plt.show() nBAmean, nBAsigma = norm.fit(past_log_stk["BABA"]) stats.probplot(past_log_stk["BABA"], dist="norm", plot=pylab) plt.title("BABA's QQ plot using Normal distribution") plt.show() nIVVmean, nIVVsigma = norm.fit(past_log_IVV) stats.probplot(past_log_IVV, dist='norm', plot=pylab) plt.title("IVV's QQ plot using Normal distribution") plt.show() # QQ plot using T Dist JDdf, tJDmean, tJDsigma = t.fit(past_log_stk["JD"]) stats.probplot(past_log_stk["JD"], JDdf, dist="t", plot=pylab) plt.title("JD's QQ plot using T-distribution") plt.show() BAdf, tBAmean, tBAsigma = t.fit(past_log_stk["BABA"]) stats.probplot(past_log_stk["BABA"], BAdf, dist="t", plot=pylab) plt.title("BABA's QQ plot using T-distribution") plt.show() IVVdf, tIVVmean, tIVVsigma = t.fit(past_log_IVV) stats.probplot(past_log_IVV, IVVdf, dist="t", plot=pylab) plt.title("IVV's QQ plot using T-distribution") plt.show() # Boxplot fig2, ax2 = plt.subplots(nrows=1, ncols=3, figsize=(13, 5)) past_log_stk["JD"].plot.box(fontsize=7,
### Sample the nwl * (nwl-1)/2 normal distributions Zarr = np.zeros(ncomb) for m in range(ncomb): sigrand = sigarr[m] # sigrand = 10 # sigrand = sigarr_expon[m] # sigrand = sigarr_div[m] # sigrand = np.random.choice(sigarr_expon) Xrand = norm.rvs(loc=0,scale=sigrand,size=1) Zarr[m] = Xrand ### Fit a Cauchy distribution loc,sca = cauchy.fit(Zarr) locnorm, scanorm = norm.fit(Zarr) dft, loct, scat = t.fit(Zarr) ### Compound distribution #sigarr[:] = sigrand #weights = 1/sigarr_expon #weights = weights / np.sum(weights) weights = np.ones_like(sigarr) pdf_cmb = lambda x: np.sum(weights * 1/sigarr * 1/np.sqrt(2*np.pi) * np.exp(-1/2*x**2/sigarr**2)) #pdf_cmb = lambda x: np.sum(weights * 1/sigarr_expon * 1/np.sqrt(2*np.pi) * np.exp(-1/2*x**2/sigarr_expon**2)) #pdf_cmb = lambda x: np.sum(weights * 1/sigarr_div * 1/np.sqrt(2*np.pi) * np.exp(-1/2*x**2/sigarr_div**2)) ### Buhlmann #v2 = np.var(sigarr)
from scipy.stats import t, laplace, norm a = np.random.randn(30) outliers = np.array([8, 8.75, 9.5]) pl.hist(a, 7, weights=[1 / 30] * 30, rwidth=0.8) #fit without outliers x = np.linspace(-5, 10, 500) loc, scale = norm.fit(a) n = norm.pdf(x, loc=loc, scale=scale) loc, scale = laplace.fit(a) l = laplace.pdf(x, loc=loc, scale=scale) fd, loc, scale = t.fit(a) s = t.pdf(x, fd, loc=loc, scale=scale) pl.plot(x, n, 'k>', x, s, 'r-', x, l, 'b--') pl.legend(('Gauss', 'Student', 'Laplace')) pl.savefig('robustDemo_without_outliers.png') #add the outliers pl.figure() pl.hist(a, 7, weights=[1 / 33] * 30, rwidth=0.8) pl.hist(outliers, 3, weights=[1 / 33] * 3, rwidth=0.8) aa = np.hstack((a, outliers)) loc, scale = norm.fit(aa) n = norm.pdf(x, loc=loc, scale=scale)
WMT_rets = WMT_rets.sort_values(axis=0, ascending=True) VaR_WMT = -WMT_rets.quantile(0.05) * np.sqrt(250) VaR = [VaR_GE, VaR_GOOG, VaR_PG, VaR_WMT] #%%For parametric VaR_GE_p = (-GE.mean() + 1.65 * GE.std()) * np.sqrt(250) VaR_GOOG_p = (-GOOG.mean() + 1.65 * GOOG.std()) * np.sqrt(250) VaR_PG_p = (-PG.mean() + 1.65 * PG.std()) * np.sqrt(250) VaR_WMT_p = (-WMT.mean() + 1.65 * WMT.std()) * np.sqrt(250) #%%t distribution from scipy.stats import skew, kurtosis, kurtosistest from scipy.stats import norm, t dx = 0.0001 # resolution x1 = np.linspace(GE.min(), GE.max(), len(GE)) parm1 = t.fit(GE_rets) nu1, mu_t1, sig_t1 = parm1 pdf1 = t.pdf(x1, nu1, mu_t1, sig_t1) print("Integral t.pdf(x1; mu1, sig1) dx = %.2f" % (np.sum(pdf1 * dx))) print("nu1 = %.2f" % nu1) print() # Compute VaR alpha = 0.05 lev = 100 * (1 - alpha) mu_norm1, sig_norm1 = norm.fit(GE_rets) h = 1 # days StudenthVaR1 = (h * (nu1 - 2) / nu1)**0.5 * t.ppf( 1 - alpha, nu1) * sig_norm1 - h * mu_norm1 print("%g%% %g-day GE Student t VaR = %.6f%%" % (lev, h, StudenthVaR1 * np.sqrt(250)))
def test_sprot(): algn = read_free(sprot_file) # truncate alignments to sequence positions with # gap frequency no greater than 20% - to avoid over-representation of gaps # alignments = truncate(algn, FRAC_ALPHA_CUTOFF) # print alignments.shape pdb_res_list = read_pdb(SPROT_PDB_FILE, 'E') msa_algn = msa_search(pdb_res_list, algn) print msa_algn sca_algn = sca(algn) algn_shape = get_algn_shape(algn) no_pos = algn_shape.no_pos no_seq = algn_shape.no_seq no_aa = algn_shape.no_aa print 'Testing SCA module :' print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin))) print 'weighted_3d_algn hash :' +\ str(np.sum(np.square(sca_algn.weighted_3d_algn))) print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight))) print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX))) print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm))) print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp))) print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs))) spect = spectral_decomp(sca_algn, 100) print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd))) print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev))) print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd))) print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd))) svd_output = LA.svd(sca_algn.pwX) U = svd_output[0] sv = svd_output[1] V = svd_output[2] # perform independent components calculations kmax = 8 learnrate = 0.0001 iterations = 20000 w = ica(transpose(spect.pos_ev[:, 0:kmax]), learnrate, iterations) ic_P = transpose(dot(w, transpose(spect.pos_ev[:, 0:kmax]))) print "ic_P hash :" + str(mat_sum(square(ic_P))) # calculate the matrix Pi = U*V' # this provides a mathematical mapping between # positional and sequence correlation n_min = min(no_seq, no_pos) Pi = dot(U[:, 0:n_min-1], transpose(V[:, 0:n_min-1])) U_p = dot(Pi, spect.pos_ev) p_cutoff = 0.9 nfit = 3 cutoffs = zeros((nfit, 1)) sector_def = [] for i in range(0, nfit): nu, mu, sigma = t.fit(ic_P[:, i]) q75, q25 = percentile(ic_P[:, i], [75, 25]) iqr = q75 - q25 binwidth = 2*iqr*pow(size(ic_P[:, i]), -1/3.0) # Freedman-Diaconisrule nbins = round(ptp(ic_P[:, i])/binwidth) yhist, xhist = histogram(ic_P[:, i], nbins) x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100) cdf_jnk = t.cdf(x_dist, nu, mu, sigma) pdf_jnk = t.pdf(x_dist, nu, mu, sigma) maxpos = argmax(pdf_jnk) tail = zeros((1, size(pdf_jnk))) if abs(max(ic_P[:, i])) > abs(min(ic_P[:, i])): tail[:, maxpos:] = cdf_jnk[maxpos:] else: tail[0:maxpos] = cdf_jnk[0:maxpos] x_dist_pos = argmin(abs(tail - p_cutoff)) cutoffs[i] = x_dist[x_dist_pos] sector_def.append(array(where(ic_P[:, i] > cutoffs[i])[0])[0]) print sector_def
def _parallel_fit_dist(i, data): from scipy.stats import t return i, t.fit(data)
def fit(Y): df, m, s = dist.fit(Y, fdf=TFixedDf.fixed_df) return np.array([m, np.log(s), np.log(df)])
sigma # In[416]: rolling_parameters = [(29, mu[i], s) for i, s in enumerate(sigma)] VaR_99 = np.array([t.ppf(0.99, *params) for params in rolling_parameters]) # Plot the minimum risk exposure over the 2005-2010 time period plt.plot(losses.index, 0.01 * VaR_99 * 100000) plt.show() # In[427]: # Fit the Student's t distribution to crisis losses p = t.fit(losses) # Compute the VaR_99 for the fitted distribution VaR_99 = t.ppf(0.99, *p) # Use the fitted parameters and VaR_99 to compute CVaR_99 tail_loss = t.expect(lambda y: y, args=(p[0], ), loc=p[1], scale=p[2], lb=VaR_99) CVaR_99 = (1 / (1 - 0.99)) * tail_loss print(CVaR_99) # 26% Loss (CVaR) on a given portfolio investment during financial crisis
btc_hist = btc_data['returns'].copy() btc_hist = btc_hist.replace([np.inf, -np.inf], np.nan).dropna(how='all') ret = np.array(btc_hist) # Fit the normal distribution N(x; mu, sig) - best fit (finding: mu, stdev) mu_norm, sig_norm = norm.fit(ret) dx = 0.0001 x = np.arange(min(ret), max(ret), dx) pdf = norm.pdf(x, mu_norm, sig_norm) print 'Normal mean = %.5f' % mu_norm print 'Normal stdev = %.5f' % sig_norm print # Fit the t-distribution - best fit (finding: nu) parm = t.fit(ret) nu, mu_t, sig_t = parm nu = np.round(nu) pdf2 = t.pdf(x, nu, mu_t, sig_t) print 'nu = %.2f' % nu print # Compute VaRs and CVaRs h = 1.0 # significance 99% alpha = 0.01 lev = 100.0*(1-alpha) xanu = t.ppf(alpha, nu) CVaR_n = alpha**-1 * norm.pdf(norm.ppf(alpha))*sig_norm - mu_norm VaR_n = norm.ppf(1-alpha)*sig_norm - mu_norm