Example #1
0
def train_single_distribution(data):
    dim = np.array(data).shape[1]
    params = []      # dim rows, each for a dimension
    for i in range(dim):
        X = data[:,i]
        params.append(t.fit(X))  # scipy.stats
    return np.array(params)
 def calibrate(self, week_num): # 61 >= week_num >= 53
     mu = [np.mean(item) for item in np.asarray(self.log_return)[:, self.index[week_num-53]:self.index[week_num]]]
     self.mu = mu
     cov_matrix = np.cov(np.asarray(self.log_return)[:, self.index[week_num-53]:self.index[week_num]])
     self.cov_matrix = cov_matrix
     ## Fitted by normal
     X_normal = np.random.multivariate_normal(mu, cov_matrix, 1000)
     L = np.array(
         [-sum(
         self.lambda_dict[self.end_date[week_num - 1]]
         * np.asarray(self.price_list)[:, self.index[week_num]]
         * (np.exp(np.asarray(X_normal[i])) - 1)
         )
         for i in range(len(X_normal[:, 0]))
         ]
     )
     weight = 1 / 15
     L_delta = np.array([sum(-weight * self.V_t[week_num] * np.asarray(X_normal[i]))
                         for i in range(len(X_normal[:, 0]))]
                        )
     VaR = np.percentile(L_delta, 0.95);
     ## Fitted by t-student
     L_act = [-(self.V_t[self.index[i+1]] - self.V_t[self.index[i]]) for i in range(week_num-53, week_num)]
     parameters = t.fit(L_act)
     L_t = t.rvs(parameters[0], parameters[1], parameters[2], 1000)
     return [L, L_delta, L_t]
Example #3
0
def compute_cvar(losses, ci, distribution='normal'):
    """
    CVaR: measures expected loss given a minimum loss equal to the (theoretical) VaR
    :param losses:
    :param ci:
    :return:
    """
    allowed_distributions = ['normal', 'student']
    assert distribution in allowed_distributions, f'distribution should be in : {allowed_distributions}'

    if distribution == allowed_distributions[0]:
        pm, ps = losses.mean(), losses.std()
        var = norm.ppf(ci, loc=pm, scale=ps)
        tail_loss = norm.expect(lambda x: x, loc=pm, scale=ps, lb=var)
    if distribution == allowed_distributions[1]:
        fitted = t.fit(losses)
        var = t.ppf(ci, *fitted)
        tail_loss = t.expect(lambda y: y,
                             args=(fitted[0], ),
                             loc=fitted[1],
                             scale=fitted[2],
                             lb=var)
    cvar = (1 / (1 - ci)) * tail_loss

    return cvar
Example #4
0
def student_VAR_calculate(his, n, alpha):
    ##Calculate the daily returns of stock
    ret = (his['closing_price'] -
           his['closing_price'].shift(n) * 1.0) / his['closing_price'].shift(n)
    ##Normal distribution best fit, mu_norm=mean, sig_norm=standard deviation
    mu_norm, sig_norm = norm.fit(ret[n:].values)

    # Student t distribution best fit (finding: nu, which is degrees of freedom)
    parm = t.fit(ret[n:].values)
    nu, mu_t, sig_t = parm
    nu = np.round(nu)

    #Set parameters for calculating value at risk from the distribution
    h = 1
    lev = 100 * (1 - alpha)
    xanu = t.ppf(alpha, nu)

    VaR_t = np.sqrt(
        (nu - 2) / nu) * t.ppf(1 - alpha, nu) * sig_norm - h * mu_norm
    #CVaR_t = -1/alpha * (1-nu)**(-1) * (nu-2+xanu**2) * t.pdf(xanu, nu)*sig_norm  - h*mu_norm

    mean_return = mu_norm * n
    mean_standard_dev = sig_norm * np.sqrt(n)

    return mean_return, mean_standard_dev, VaR_t
Example #5
0
def copulafit(u, family='gaussian'):
    '''
    Fit copula to data.
    Returns correlation matrix and degrees of freedom for t student
    '''

    rhohat = None  # correlation matrix
    nuhat = None  # degrees of freedom (for t student) 

    if family=='gaussian':
        u[u>=1.0] = 0.999999
        inv_n = ndtri(u)
        rhohat = np.corrcoef(inv_n.T)

    elif family=='t':
        raise ValueError("Not implemented")

        # TODO: no encaja con los datos. no funciona 
        x = np.linspace(np.min(u), np.max(u),100)
        inv_t = np.ndarray((len(x), u.shape[1]))

        for j in range(u.shape[1]):
            param = t.fit(u[:,j])
            t_pdf = t.pdf(x,loc=param[0],scale=param[1],df=param[2])
            inv_t[:,j] = t_pdf

        # TODO CORRELACION? NUHAT?
        rhohat = np.corrcoef(inv_n.T)
        nuhat = None

    else:
        raise ValueError("Wrong family parameter. Use 'gaussian' or 't'")

    return rhohat, nuhat
Example #6
0
 def _fit(self, X):
     df, loc, scale = t.fit(X)
     self._params = {
         'df': df,
         'loc': loc,
         'scale': scale
     }
Example #7
0
 def CVaR_t(self, alpha, h):
     """ h is number of days """
     self.nu, self.mu, self.sigma = t.fit(self.rets)
     mu_h, sigma_h = self.mu * (h/len(self.rets)), self.sigma * ((h/len(self.rets)) ** 0.5)
     self.nu = np.round(self.nu)
     xanu    = t.ppf(alpha, self.nu)
     CVAR    = self.CVaR_t_stats(alpha, 1, self.nu, xanu, mu_h, sigma_h)
     return pd.DataFrame({"CVaR (t-statistics)": CVAR}, index = {"Confidence {0:.1f}%".format(100-alpha*100)})
Example #8
0
def duplicates():
    df = get_duplicate_data()
    df['error'] = df["POSIX_AGG_PERF_BY_SLOWEST_LOG10"] - df["prediction"]

    df = df[np.abs(df.error) < np.log10(1.5)]
    df.time_diff = np.log10(df.time_diff + 0.01)
    # df.error = np.abs(df.error)

    cuts = [-np.inf] + list(range(9))
    groups = [
        df[(df.time_diff >= low) & (df.time_diff < high)].error
        for low, high in zip(cuts[:-1], cuts[1:])
    ]

    # fit a student t distribution
    from scipy.stats import t
    param = t.fit(groups[0])
    norm_gen_data = t.rvs(param[0], param[1], param[2], 10000)

    groups = list(reversed([norm_gen_data] + groups))
    labels = list(
        reversed(["t-distribution fit", "0s to 1s"] + [
            "$10^{}s$ to $10^{}s$".format(low, high)
            for low, high in zip(cuts[1:-1], cuts[2:])
        ]))

    fig, axes = joypy.joyplot(groups,
                              colormap=matplotlib.cm.coolwarm_r,
                              overlap=0.3,
                              linewidth=1.,
                              ylim='own',
                              range_style='own',
                              tails=0.2,
                              bins=100,
                              labels=labels,
                              figsize=(2.5, 3))

    for idx, ax in enumerate(axes):
        try:
            ax.set_yticklabels([labels[idx]], fontsize=8, rotation=120)
        except:
            pass
        ax.set_xlim([-0.2, 0.2])
        ax.set_xticks(np.log10([1 / 1.5, 1 / 1.2, 1, 1.2, 1.5]))
        ax.set_xticklabels([
            "$.67\\times$", "$.83\\times$", "$1\\times$", "$1.2\\times$",
            "$1.5\\times$"
        ],
                           rotation=90,
                           fontsize=8)

    plt.xlabel("Error", rotation=180)
    plt.ylabel("Time ranges")

    plt.savefig("figures/figure_5.pdf",
                dpi=600,
                bbox_inches='tight',
                pad_inches=0)
Example #9
0
 def __init__(self, mode=0, elem=None, sample=None):
     if mode == 0:
         self.df = elem[0]
         self.mu = elem[1]
         self.sigma = elem[2]
     else:
         self.df, self.mu, self.sigma = t.fit(sample)
     self.math_average = t.mean(self.df, loc=self.mu, scale=self.sigma)
     self.dispersion = t.var(self.df, loc=self.mu, scale=self.sigma)
Example #10
0
 def VaR_t(self, alpha, h, verbose):
     """ h is number of days """
     self.nu, self.mu, self.sigma = t.fit(self.rets)
     mu_h, sigma_h = self.mu * (h/len(self.rets)), self.sigma * ((h/len(self.rets)) ** 0.5)
     VAR           = self.VaR_t_stats(alpha, h, self.nu, self.mu, sigma_h)
     if verbose:
         return pd.DataFrame({"VaR (t-statistics)": VAR}, index = {"Confidence {0:.1f}%".format(100-alpha*100)})
     else:
         return VAR
Example #11
0
def show_generation_result():

    # Define 2d model for cycle length and cycle length variability
    random.seed(19101985)
    np.random.seed(28111987)
    plt.style.use('seaborn-talk')
    fig_x, fig_y = 6, 6
    x, y = np.mgrid[20:40:0.1, 0:7:0.05]
    par_space_points = np.dstack((x, y))
    toy_generator = Generator(gen='double_gamma')
    x_range, y_range = (25, 40), (1, 5)
    z = toy_generator.get_pdf(par_space_points)

    # Generate a few points in this parameter space
    data = toy_generator.generate_people(10)

    # Plotting
    with PdfPages('plots/multivariate_test.pdf') as pages:

        # 2D model
        fig = plt.figure(1, figsize=(fig_x, fig_y))
        ax = fig.add_subplot(111)
        ax.contour(x, y, z, 25, linewidths=0.7, cmap=plt.cm.Greys, linestyles='dashed', zorder=0)
        for x, y, name, color in zip(data['true_cl'], data['true_clv'], data['name'], data['color']):
            plotting.add_point(ax=ax, pos=(x, y), x_range=x_range, y_range=y_range, name=name, color=color)
        plotting.set_axis_info(ax, x_range=x_range, y_range=y_range,
                               title_x='Cycle Length', title_y='Cycle Length Variability')
        pages.savefig(fig, bbox_inches='tight', pad_inches=0.1)
        plt.close()

        # Individual gaussians
        for index, row in data.iterrows():
            fig = plt.figure(1, figsize=(fig_x, fig_y))
            ax = fig.add_subplot(111)
            vals, color = row['cls'], row['color']
            kwargs = dict(color=color, linewidth=1.5)
            counts, bins, patches = ax.hist(vals, density=True, histtype='stepfilled', bins=40, alpha=0.3, **kwargs)
            bin_cs = np.array(math_ops.get_bin_centers(bins))
            authors, labels = [patches[0]], ['Generated values ({})'.format(len(vals))]
            try:
                bf_pars = t.fit(vals)
                best_fit, = ax.plot(bin_cs, t.pdf(bin_cs, *bf_pars), linestyle='--', **kwargs)
                nothing = plotting.get_dummy_element()
                authors += [nothing, best_fit, nothing]
                labels += ['', 'Best fit', '$\mu\' = {1:.2f}, \sigma\' = {2:.2f}, \\nu\' = {0:.1f}$'.format(*bf_pars)]
                true_pars = (row['true_cl'], row['true_clv'])
                true_vals, = ax.plot(bin_cs, norm.pdf(bin_cs, *true_pars), linestyle='-', **kwargs)
                authors += [nothing, true_vals, nothing]
                labels += ['', 'True distribution', '$\mu = {0:.2f}, \sigma = {1:.2f}$'.format(*true_pars)]
            except RuntimeError:
                print('Fit failed ---> ', len(vals))
            plotting.set_axis_info(ax, title_x='Cycle Length', title_y='Arbitrary Units')
            ax.legend(authors, labels, loc='upper left', fontsize=10, framealpha=0.1)
            pages.savefig(fig, bbox_inches='tight', pad_inches=0.1)
            plt.close()
Example #12
0
def historical_VaR(returns, rolling_size, alpha=0.05):
    """
    Computing 20 days realized Value at Risk.
    """
    realized_volatility = returns.rolling(rolling_size).std()
    realized_mean = returns.rolling(rolling_size).mean()

    # Value at Risk based on realized data with t distribution assumption
    df = t.fit(returns)[0]
    z_t = t.ppf([alpha], df=df)
    return realized_mean[:-1] + realized_volatility[:-1] * z_t
Example #13
0
File: rec.py Project: ufda/btgym
    def fit(self, trajectory=None):
        """
        Fits parameters to currently stored or provided data.

        Args:
            trajectory:     array_like, data to fit or None

        Returns:
            fitted parameters: fd, loc, state
        """
        if trajectory is None:
            if self.df is None:
                self.df, self.loc, self.scale = student_t.fit(self.trajectory[-self.mask_idx:])
                self.df = np.clip(self.df, 3, None)

            return self.df, self.loc, self.scale

        else:
            df, loc, scale = student_t.fit(trajectory)
            df = np.clip(df, 3, None)

            return df, loc, scale
Example #14
0
def naiveTopt(icm,cutoff=.05): #like topt but doesn't correct for tail direction
    """ Returns cluster by fitting t-test and returning residues above cutoff """
    
    param = t.fit(icm,loc=np.median(icm))
    x = np.linspace(-1,1,200)
    cdf = t.cdf(x,param[0],loc=param[1], scale=param[2])

    minx = np.max(x[np.nonzero(cdf<cutoff)])

    # deal with direction of tail:
    cursect = np.array([i for i in range(icm.size) if icm[i]<minx])
        
    return cursect
Example #15
0
    def calc_risk_t(self, confidence=0.95):
        port_returns = self.returns.dot(self.allocation)
        losses = -port_returns.iloc[:, 0]

        params = t.fit(losses)
        VaR = t.ppf(confidence, *params)

        tail_loss = t.expect(lambda y: y,
                             args=(params[0], ),
                             loc=params[1],
                             scale=params[2],
                             lb=VaR)
        CVaR = (1 / (1 - confidence)) * tail_loss

        return losses, VaR, CVaR
Example #16
0
    def initialize(y: np.ndarray):
        """ Function that calculates the starting values, for each distributional parameter individually.

        y: np.ndarray
            Data from which starting values are calculated.

        """
        nu_fit, loc_fit, scale_fit = student_t.fit(y)
        location_init = StudentT.param_dict_inv()["location_inv"](loc_fit)
        scale_init = StudentT.param_dict_inv()["scale_inv"](scale_fit)
        nu_init = StudentT.param_dict_inv()["nu_inv"](nu_fit)

        start_values = np.array([location_init, scale_init, nu_init])

        return start_values
def calc_CVar(Df, alpha=0.05, dist='n'):
    rtn = Df / Df.shift(1) - 1
    if dist == 'n':
        mu, sig = norm.fit(rtn.dropna().values)
        mu = mu * 252
        sig = sig * 252**(0.5)
        CVar = alpha**(-1.) * norm.pdf(norm.ppf(alpha)) * sig - mu
    if dist == 't':
        nu, mu, sig = t.fit(rtn.dropna().values)
        mu = mu * 252
        sig = sig * 252**(0.5)
        xanu = t.ppf(alpha, nu)
        CVar = -1. / alpha * (1 - nu)**(-1.) * (nu - 2 + xanu**2.) * t.pdf(
            xanu, nu) * sig - mu
    return CVar
Example #18
0
def wavelet_values(ax):
    
    model_name = 'vgg16'
    new_size = 50

    wavelet_data_file = 'Results/'+model_name+'_wavelet_ssm_'+str(new_size)+'.h5'

    results = h5py.File(wavelet_data_file, 'r')

    model_layers = results.keys()
    model_layers = [l for l in model_layers if l.split('_')[0].isdigit()]
    model_layers.sort(key=lambda x: int(x.split('_')[0]))

    # r_vals = []
    # for l in layers:
    #     r_vals.append(data[l+'/ssm/r'].value)
    BONF_CRE = len(model_layers)
    POOL_INDICES = np.array([0,3,6,10,14,18])

    ssm = np.array([results[l]['ssm/r'].value for l in model_layers])
    shuffles = [results[l]['ssm/rshuffles'] for l in model_layers]
    t_shuffles = [t.fit(s) for s in shuffles]
    # norm_shuffles = [norm.fit(s) for s in shuffles]

    p_vals = np.array([1.0 - t.cdf(s, *tparams) for s,tparams in zip(ssm, t_shuffles)])
    # norm_p_vals = [1.0 - norm.cdf(s, *tparams) for s,tparams in zip(ssm, norm_shuffles)]

    shuffle_mean_ssm = [np.mean(results[l]['ssm/rshuffles'].value) for l in model_layers]
    shuffle_std_ssm = [np.std(results[l]['ssm/rshuffles'].value) for l in model_layers]

    shuffle_min_ssm = np.array(shuffle_mean_ssm) - np.array(shuffle_std_ssm)
    shuffle_max_ssm = np.array(shuffle_mean_ssm) + np.array(shuffle_std_ssm)


    ssm = ssm[POOL_INDICES]
    p_vals = p_vals[POOL_INDICES]
    shuffle_min_ssm = shuffle_min_ssm[POOL_INDICES]
    shuffle_max_ssm = shuffle_max_ssm[POOL_INDICES]


    sig = np.array([p < ALPHA/BONF_CRE for p in p_vals])
    sig_index = np.where(sig)[0]

    ax.plot(sig_index, ssm[sig_index], 'o',color='k', markersize=5, alpha=0.5)

    ax.plot(ssm, '--', color='k', linewidth=3, alpha=0.5)
Example #19
0
def topt(icm,cutoff=.05):
    """ Returns cluster by fitting t-test and returning residues above cutoff """
    
    param = t.fit(icm,loc=np.median(icm))
    x = np.linspace(-1,1,200)
    cdf = t.cdf(x,param[0],loc=param[1], scale=param[2])

    minx = np.max(x[np.nonzero(cdf<cutoff)])

    # deal with direction of tail:
    if icm[np.nonzero(np.abs(icm)==np.max(np.abs(icm)))]<0:
        cursect = np.array([i for i in range(icm.size) if icm[i]<minx])
    else:
        maxx = np.min(x[np.nonzero(cdf>(1-cutoff))])
        cursect = np.array([i for i in range(icm.size) if icm[i]>maxx])
        
    return cursect
def VaR(r_0, data, time, percentile=0.99, upward=True):
    # Model calibration
    k, mu, sigma = model_calibration(data)

    # VaR calculation
    expectation = r_0 * np.exp(-k * time) + mu * (1 - np.exp(-k * time))
    variance = (sigma**2 / 2 * k) * (1 - np.exp(-2 * k * time))

    # Estimate the degrees of freedom for the t-distribution
    t_df = tdistr.fit(data)[0]

    if upward:
        return np.ravel(expectation +
                        np.sqrt(variance) * tdistr.ppf(percentile, t_df))
    else:
        return np.ravel(expectation -
                        np.sqrt(variance) * tdistr.ppf(percentile, t_df))
Example #21
0
    def fit(data: FloatIterable,
            nu: float = None,
            mu: float = 0,
            sigma: float = 1) -> 'StudentsT':
        """
        Fit a Students-T distribution to the data.

        :param data: Iterable of data to fit to.
        :param nu: Optional fixed value for degrees of freedom.
        :param mu: Optional fixed value for mean. Default is 0.
        :param sigma: Optional fixed value for standard deviation. Default is 1
        """
        kwargs = {}
        for arg, kw in zip((nu, mu, sigma), ('fdf', 'floc', 'fscale')):
            if arg is not None:
                kwargs[kw] = arg
        nu, mu, sigma = t.fit(data=data, **kwargs)
        return StudentsT(nu=nu, mu=mu, sigma=sigma)
def get_dists(lr, lb):
    kde_dict = dict()
    t_dict = dict()
    norm_dict = dict()
    for i in range(3):
        temp = lr[lb == i]
        xr = np.linspace(lr.min(), lr.max(), 1000)

        kde = gaussian_kde(temp)
        y = kde(xr)
        kde_dict[str(i)] = y

        t_pdf = t.pdf(xr, *t.fit(temp))
        n_pdf = norm.pdf(xr, *norm.fit(temp))
        t_dict[str(i)] = t_pdf
        norm_dict[str(i)] = n_pdf

    return xr, kde_dict, t_dict, norm_dict
Example #23
0
def is_t_distributed(X, K=500):
    # get t parameters
    nu, mu, sigma = t.fit(X, loc=X.mean(), scale=X.std())
    # Kolmogorov-Smirnoff of original sample
    stat0, _ = kstest(X.to_numpy(), t.cdf, args=(nu, ))

    # distribution
    d = []
    for k in range(K):
        # generate
        tsample = t.rvs(nu, loc=mu, scale=sigma, size=X.shape[0])
        # KS
        stat, _ = kstest(tsample, t.cdf, args=(nu, ))
        d.append(stat)
    d = np.array(d)

    # compute pvalue
    pvalue = (np.sum(d > stat0) + 1) / (d.shape[0] + 1)
    return Distr(pvalue)
Example #24
0
def error_model(data):
    #fitting a normal distribution

    #fitting t dist
    df, mu, std = t.fit(data, floc=0)

    #fitting norm dist
    mu1, std1 = norm.fit(data, floc=0)

    print ' T dist parameteres are', a, mu, std
    print 'Gaussian parameters are', mu1, std1

    plt.figure()
    # Plot the histogram.
    plt.hist(data, bins=25, normed=True, alpha=0.5)

    # Plot the PDF.
    xmin, xmax = plt.xlim()
    x = np.linspace(xmin, xmax, 100)
    p1 = norm.pdf(x, loc=mu1, scale=std1)
    p = t.pdf(x, df, loc=mu, scale=std)
    kde_data = data[:, np.newaxis]
    X_plot = np.linspace(xmin, xmax, 1000)[:, np.newaxis]

    # Gaussian KDE
    kde = KernelDensity(kernel='gaussian', bandwidth=0.035).fit(kde_data)
    log_dens = kde.score_samples(X_plot)
    #plt.plot(X_plot[:, 0], np.exp(log_dens), linewidth = 2)

    plt.plot(x, p, 'k', linewidth=2, label='t-distribution')
    plt.plot(x, p1, 'r', linewidth=2, label='Gaussian distribution')
    title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
    #plt.title(title)
    plt.xlabel('Normalized Error', fontsize=16)
    plt.ylabel('Probability of Error', fontsize=16)
    #plt.ylim(0,6)
    #plt.title('Sensor 1', fontsize = 17)
    plt.legend()
    plt.show()
Example #25
0
def compute_theoretical_var(losses, ci, distribution='normal'):
    """
    Compute theoretical VaR (Value at Risk) at a given confidence level
    :param losses:
    :param ci:
    :return:
    """
    allowed_distributions = ['normal', 'student', 'gaussian_kde']
    assert distribution in allowed_distributions, f'distribution should be in : {allowed_distributions}'

    if distribution == allowed_distributions[0]:
        pm, ps = losses.mean(), losses.std()
        res = norm.ppf(ci, loc=pm, scale=ps)
    if distribution == allowed_distributions[1]:
        fitted = t.fit(losses)
        res = t.ppf(ci, *fitted)
    if distribution == allowed_distributions[2]:
        fitted = gaussian_kde(losses)
        sample = fitted.resample(100000)
        res = np.quantile(sample, ci)

    return res
Example #26
0
# QQ plot using Normal Dist
nJDmean, nJDsigma = norm.fit(past_log_stk["JD"])
stats.probplot(past_log_stk["JD"], dist="norm", plot=pylab)
plt.title("JD's QQ plot using Normal distribution")
plt.show()
nBAmean, nBAsigma = norm.fit(past_log_stk["BABA"])
stats.probplot(past_log_stk["BABA"], dist="norm", plot=pylab)
plt.title("BABA's QQ plot using Normal distribution")
plt.show()
nIVVmean, nIVVsigma = norm.fit(past_log_IVV)
stats.probplot(past_log_IVV, dist='norm', plot=pylab)
plt.title("IVV's QQ plot using Normal distribution")
plt.show()

# QQ plot using T Dist
JDdf, tJDmean, tJDsigma = t.fit(past_log_stk["JD"])
stats.probplot(past_log_stk["JD"], JDdf, dist="t", plot=pylab)
plt.title("JD's QQ plot using T-distribution")
plt.show()
BAdf, tBAmean, tBAsigma = t.fit(past_log_stk["BABA"])
stats.probplot(past_log_stk["BABA"], BAdf, dist="t", plot=pylab)
plt.title("BABA's QQ plot using T-distribution")
plt.show()
IVVdf, tIVVmean, tIVVsigma = t.fit(past_log_IVV)
stats.probplot(past_log_IVV, IVVdf, dist="t", plot=pylab)
plt.title("IVV's QQ plot using T-distribution")
plt.show()

# Boxplot
fig2, ax2 = plt.subplots(nrows=1, ncols=3, figsize=(13, 5))
past_log_stk["JD"].plot.box(fontsize=7,
### Sample the nwl * (nwl-1)/2 normal distributions
Zarr = np.zeros(ncomb)
for m in range(ncomb):
    sigrand = sigarr[m]
#    sigrand = 10
#    sigrand = sigarr_expon[m]
#    sigrand = sigarr_div[m]
#    sigrand = np.random.choice(sigarr_expon)
    Xrand = norm.rvs(loc=0,scale=sigrand,size=1)
    Zarr[m] = Xrand

### Fit a Cauchy distribution
loc,sca = cauchy.fit(Zarr)
locnorm, scanorm = norm.fit(Zarr)
dft, loct, scat = t.fit(Zarr)

### Compound distribution
#sigarr[:] = sigrand
#weights = 1/sigarr_expon
#weights = weights / np.sum(weights)
weights = np.ones_like(sigarr)
pdf_cmb = lambda x: np.sum(weights * 1/sigarr * 1/np.sqrt(2*np.pi) * np.exp(-1/2*x**2/sigarr**2))
#pdf_cmb  = lambda x: np.sum(weights * 1/sigarr_expon * 1/np.sqrt(2*np.pi) * np.exp(-1/2*x**2/sigarr_expon**2))
#pdf_cmb  = lambda x: np.sum(weights * 1/sigarr_div * 1/np.sqrt(2*np.pi) * np.exp(-1/2*x**2/sigarr_div**2))

### Buhlmann
#v2 = np.var(sigarr)


Example #28
0
from scipy.stats import t, laplace, norm

a = np.random.randn(30)
outliers = np.array([8, 8.75, 9.5])
pl.hist(a, 7, weights=[1 / 30] * 30, rwidth=0.8)

#fit without outliers
x = np.linspace(-5, 10, 500)

loc, scale = norm.fit(a)
n = norm.pdf(x, loc=loc, scale=scale)

loc, scale = laplace.fit(a)
l = laplace.pdf(x, loc=loc, scale=scale)

fd, loc, scale = t.fit(a)
s = t.pdf(x, fd, loc=loc, scale=scale)
pl.plot(x, n, 'k>',
        x, s, 'r-',
        x, l, 'b--')
pl.legend(('Gauss', 'Student', 'Laplace'))
pl.savefig('robustDemo_without_outliers.png')

#add the outliers
pl.figure()
pl.hist(a, 7, weights=[1 / 33] * 30, rwidth=0.8)
pl.hist(outliers, 3, weights=[1 / 33] * 3, rwidth=0.8)
aa = np.hstack((a, outliers))

loc, scale = norm.fit(aa)
n = norm.pdf(x, loc=loc, scale=scale)
WMT_rets = WMT_rets.sort_values(axis=0, ascending=True)
VaR_WMT = -WMT_rets.quantile(0.05) * np.sqrt(250)
VaR = [VaR_GE, VaR_GOOG, VaR_PG, VaR_WMT]

#%%For parametric
VaR_GE_p = (-GE.mean() + 1.65 * GE.std()) * np.sqrt(250)
VaR_GOOG_p = (-GOOG.mean() + 1.65 * GOOG.std()) * np.sqrt(250)
VaR_PG_p = (-PG.mean() + 1.65 * PG.std()) * np.sqrt(250)
VaR_WMT_p = (-WMT.mean() + 1.65 * WMT.std()) * np.sqrt(250)
#%%t distribution

from scipy.stats import skew, kurtosis, kurtosistest
from scipy.stats import norm, t
dx = 0.0001  # resolution
x1 = np.linspace(GE.min(), GE.max(), len(GE))
parm1 = t.fit(GE_rets)
nu1, mu_t1, sig_t1 = parm1
pdf1 = t.pdf(x1, nu1, mu_t1, sig_t1)
print("Integral t.pdf(x1; mu1, sig1) dx = %.2f" % (np.sum(pdf1 * dx)))
print("nu1 = %.2f" % nu1)
print()
# Compute VaR
alpha = 0.05
lev = 100 * (1 - alpha)
mu_norm1, sig_norm1 = norm.fit(GE_rets)
h = 1  # days
StudenthVaR1 = (h * (nu1 - 2) / nu1)**0.5 * t.ppf(
    1 - alpha, nu1) * sig_norm1 - h * mu_norm1
print("%g%% %g-day GE Student t VaR = %.6f%%" %
      (lev, h, StudenthVaR1 * np.sqrt(250)))
Example #30
0
def test_sprot():
    algn = read_free(sprot_file)
    # truncate alignments to sequence positions with
    # gap frequency no greater than 20% - to avoid over-representation of gaps
    # alignments = truncate(algn, FRAC_ALPHA_CUTOFF)
    # print alignments.shape
    pdb_res_list = read_pdb(SPROT_PDB_FILE, 'E')
    msa_algn = msa_search(pdb_res_list, algn)
    print msa_algn
    sca_algn = sca(algn)
    algn_shape = get_algn_shape(algn)
    no_pos = algn_shape.no_pos
    no_seq = algn_shape.no_seq
    no_aa = algn_shape.no_aa

    print 'Testing SCA module :'
    print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin)))
    print 'weighted_3d_algn hash :' +\
        str(np.sum(np.square(sca_algn.weighted_3d_algn)))
    print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight)))
    print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX)))
    print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm)))
    print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp)))
    print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs)))
    spect = spectral_decomp(sca_algn, 100)
    print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev)))
    print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd)))

    svd_output = LA.svd(sca_algn.pwX)
    U = svd_output[0]
    sv = svd_output[1]
    V = svd_output[2]

    # perform independent components calculations
    kmax = 8
    learnrate = 0.0001
    iterations = 20000
    w = ica(transpose(spect.pos_ev[:, 0:kmax]), learnrate, iterations)
    ic_P = transpose(dot(w, transpose(spect.pos_ev[:, 0:kmax])))

    print "ic_P hash :" + str(mat_sum(square(ic_P)))
    # calculate the matrix Pi = U*V'
    # this provides a mathematical mapping between
    # positional and sequence correlation

    n_min = min(no_seq, no_pos)
    Pi = dot(U[:, 0:n_min-1], transpose(V[:, 0:n_min-1]))
    U_p = dot(Pi, spect.pos_ev)

    p_cutoff = 0.9
    nfit = 3
    cutoffs = zeros((nfit, 1))
    sector_def = []

    for i in range(0, nfit):
        nu, mu, sigma = t.fit(ic_P[:, i])
        q75, q25 = percentile(ic_P[:, i], [75, 25])
        iqr = q75 - q25
        binwidth = 2*iqr*pow(size(ic_P[:, i]), -1/3.0)  # Freedman-Diaconisrule
        nbins = round(ptp(ic_P[:, i])/binwidth)
        yhist, xhist = histogram(ic_P[:, i], nbins)
        x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100)
        cdf_jnk = t.cdf(x_dist, nu, mu, sigma)
        pdf_jnk = t.pdf(x_dist, nu, mu, sigma)
        maxpos = argmax(pdf_jnk)
        tail = zeros((1, size(pdf_jnk)))
        if abs(max(ic_P[:, i])) > abs(min(ic_P[:, i])):
            tail[:, maxpos:] = cdf_jnk[maxpos:]
        else:
            tail[0:maxpos] = cdf_jnk[0:maxpos]
        x_dist_pos = argmin(abs(tail - p_cutoff))
        cutoffs[i] = x_dist[x_dist_pos]
        sector_def.append(array(where(ic_P[:, i] > cutoffs[i])[0])[0])
    print sector_def
Example #31
0
def _parallel_fit_dist(i, data):
    from scipy.stats import t
    return i, t.fit(data)
Example #32
0
 def fit(Y):
     df, m, s = dist.fit(Y, fdf=TFixedDf.fixed_df)
     return np.array([m, np.log(s), np.log(df)])
sigma

# In[416]:

rolling_parameters = [(29, mu[i], s) for i, s in enumerate(sigma)]
VaR_99 = np.array([t.ppf(0.99, *params) for params in rolling_parameters])

# Plot the minimum risk exposure over the 2005-2010 time period
plt.plot(losses.index, 0.01 * VaR_99 * 100000)
plt.show()

# In[427]:

# Fit the Student's t distribution to crisis losses
p = t.fit(losses)

# Compute the VaR_99 for the fitted distribution
VaR_99 = t.ppf(0.99, *p)

# Use the fitted parameters and VaR_99 to compute CVaR_99
tail_loss = t.expect(lambda y: y,
                     args=(p[0], ),
                     loc=p[1],
                     scale=p[2],
                     lb=VaR_99)
CVaR_99 = (1 / (1 - 0.99)) * tail_loss
print(CVaR_99)

# 26% Loss (CVaR) on a given portfolio investment during financial crisis
Example #34
0
btc_hist = btc_data['returns'].copy()
btc_hist = btc_hist.replace([np.inf, -np.inf], np.nan).dropna(how='all')

ret = np.array(btc_hist)

# Fit the normal distribution N(x; mu, sig) - best fit (finding: mu, stdev)
mu_norm, sig_norm = norm.fit(ret)
dx = 0.0001
x = np.arange(min(ret), max(ret), dx)
pdf = norm.pdf(x, mu_norm, sig_norm)
print 'Normal mean  = %.5f' % mu_norm
print 'Normal stdev = %.5f' % sig_norm
print

# Fit the t-distribution - best fit (finding: nu)
parm = t.fit(ret)
nu, mu_t, sig_t = parm
nu = np.round(nu)
pdf2 = t.pdf(x, nu, mu_t, sig_t)
print 'nu = %.2f' % nu
print

# Compute VaRs and CVaRs
h = 1.0
# significance 99%
alpha = 0.01
lev = 100.0*(1-alpha)
xanu = t.ppf(alpha, nu)

CVaR_n = alpha**-1 * norm.pdf(norm.ppf(alpha))*sig_norm - mu_norm
VaR_n = norm.ppf(1-alpha)*sig_norm - mu_norm