Ejemplo n.º 1
0
Archivo: t.py Proyecto: ronrest/pyrpy
def dt(x, df=1, loc=0, scale=1, ncp=None, log=False):
    """
    Density Function for the t distribution.
    Returns the probability density value at the value x.

    ARGS:
    ---------------
    :param x (float, array of floats):
        The value(s) of x
    :param df (float):
        degrees of freedom
    :param loc: array_like, optional
        location parameter (default=0)
    :param scale: float, optional
        scale (default=1)
    :param ncp (float):
        non-centrality parameter delta.
        Currently not implemented.
    :param log (bool):
        take the log?


    RETURN:
    ---------------
    :return:        returns an array of density values
    """
    # ==========================================================================
    if log:
        return t.logpdf(x, df=df, loc=0, scale=1)
    else:
        return t.pdf(x, df=df, loc=0, scale=1)
Ejemplo n.º 2
0
def GeneratePDF(Data, method = 'Robust_Student_t', lower_threshold = 0.15, upper_threshold = 0.85):
    
    '''Generate the pdf estimate of the data
    Input: /Data/   data to estimate pdf on
           /method/ Method of estimation.
                    Available methods: 'Robust_Student_t'; 'KDE'; 'Normal'
           /lower_threshold/ in percentage
           /upper_threshold/ in percentage
    Output: /pdf/   fitted pdf
            /cdf/   fitted cdf
    '''
    x = np.linspace(min(Data), max(Data), 100)
    if method == 'Robust_Student_t':
        nu, mu, sigma = uvtfit(Data)
        pdf = t.pdf(x, nu, mu, sigma)
        cdf = t.cdf(x, nu, mu, sigma)
        lower = t.ppf(lower_threshold, nu, mu, sigma)
        upper = t.ppf(upper_threshold, nu, mu, sigma)
        
    elif method == 'Normal':
        mu, sigma = norm.fit(Data)
        pdf = norm.pdf(x, mu, sigma)
        cdf = norm.cdf(x, mu, sigma)
        lower = norm.ppf(lower_threshold, mu, sigma)
        upper = norm.ppf(upper_threshold, mu, sigma)
        
    elif method == 'KDE':
        kernal = gaussian_kde(Data)
        pdf = kernal.evaluate(x)
        cdf = np.array([kernal.integrate_box(x[0], x[i+1]) for i in range(len(x)-1)])
        lower = np.percentile(cdf, lower_threshold*100)
        upper = np.percentile(cdf, upper_threshold*100)
        
    return x, pdf, cdf, lower, upper
Ejemplo n.º 3
0
def studentT_curve(ax=None, linewidth=4, color='k', mean=0, SD=1, 
                   df=20,
                   facecolor='gray',
                   xlabel='standardized units',
                   ylabel='% per standardized unit', 
                   alpha=0.5,
                   **plot_opts):

   if ax is None:
      fig = plt.gcf()
      ax = fig.add_subplot(111)
   
   plot_opts['linewidth'] = linewidth
   plot_opts['color'] = color

   Z = np.linspace(-4,4,101)
   X = mean+SD*Z
   Y = tdist.pdf(Z, df) / SD
   ax.plot(X, Y, **plot_opts)
   ax.fill_between(X, 0*X, Y, alpha=alpha, facecolor=facecolor)
   if xlabel:
      ax.set_xlabel(xlabel, fontsize=20)
   if ylabel:
      ax.set_ylabel(ylabel, fontsize=20)
   ax.set_ylim([0,0.45/SD])
   ax.set_xlim([X.min(),X.max()])
   return ax
Ejemplo n.º 4
0
  def predict_proba(self, X):
    N, D = X.shape
    # P = np.zeros(N)
    # for n in xrange(N):
    #   x = X[n]

    #   pyx = []
    #   for c in (0, 1):
    #     pycx = self.pyy[c]
    #     for d in xrange(D):
    #       tinfo_cd = self.tinfo[c][d]
    #       pdf_d = t.pdf(x[d], df=tinfo_cd['df'], loc=tinfo_cd['center'], scale=tinfo_cd['scale'])
    #       pycx *= pdf_d
    #     pyx.append(pycx)

    #   py1x = pyx[1] / (pyx[0] + pyx[1])
    #   # print "p(y=1|x):", py1x
    #   P[n] = py1x

    posteriors = np.zeros((N, 2))
    for c in (0, 1):
      probability_matrix = np.zeros((N, D))
      for d in xrange(D):
        tinfo_cd = self.tinfo[c][d]
        pdf_d = t.pdf(X[:,d], df=tinfo_cd['df'], loc=tinfo_cd['center'], scale=tinfo_cd['scale'])
        probability_matrix[:,d] = pdf_d
      posteriors_c = np.prod(probability_matrix, axis=1)*self.pyy[c]
      posteriors[:,c] = posteriors_c
    P = posteriors[:,1] / np.sum(posteriors, axis=1)
    return P
Ejemplo n.º 5
0
 def get_predprobs(self, datum):
     """
     Predictive distribution of NIG is a T distribution.
     """
     muT = self.params.post_params['mu']
     nuT = self.params.post_params['nu']
     alphaT = self.params.post_params['alpha']
     betaT = self.params.post_params['beta']
     return t.pdf(datum, 2*alphaT, muT, betaT*(1+nuT) / (alphaT*nuT))
Ejemplo n.º 6
0
def plot_dist(n, q1_arr):  # n=標本の大きさ,q1_arr=q1値のarray
    
    xx=np.linspace(-3,3,num=100)  # 図を作成するために横軸の値を設定

    kde_model=gaussian_kde(q1_arr)  # カーネル密度推定を使いt値の分布を推定
    
    t_dist = t.pdf(xx,df=n-2)  # 同じ自由度のt分布
    
    plt.plot(xx, kde_model(xx), 'g-')  # t値の分布プロット
    plt.plot(xx, t_dist,'b:')  # t分布
    plt.ylabel('Kernel Density')  # 縦軸のラベル
    plt.title('n = {0}'.format(n))  # タイトル
Ejemplo n.º 7
0
 def parametricES_student(self,
                          dof: int,
                          confidenceLevel: float = 0.95) -> {}:
     es_t = {}
     for name in self.returns.columns:
         mu = np.mean(self.returns[name])
         std = np.std(self.returns[name])
         xanu = t.ppf(1 - confidenceLevel, dof)
         es_t[name] = round(
             (-1 / (1 - confidenceLevel)) * (1 - dof)**(-1) *
             (dof - 2 + xanu**2) * t.pdf(xanu, dof) * std - mu, 3)
     return es_t
Ejemplo n.º 8
0
    def confidence(self, i):
        """ Return the 95 per cent confidence interval for a fitted parameter
        https://stats.stackexchange.com/questions/72047/when-fitting-a-curve-how-do-i-calculate-the-95-confidence-interval-for-my-fitt
        [BestFit(Pi) +/- t(95%,DF)*SE(Pi)

        NOTES:
            TODO(arl): make this a user defined interval
        """
        ci = constants.CONFIDENCE_INTERVAL / 100.0
        conf = t_distrb.pdf(ci, self.DoF) * self.SE(i)
        return (self.fit_params[i].value - conf,
                self.fit_params[i].value + conf)
Ejemplo n.º 9
0
def chi2_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x,df))
    
    #simulate the chi2 distribution
    y = []
    n=10
    for i in range(1000):
        chi2r=0.0
        r = norm.rvs(size=n)
        for j in range(n):
            chi2r=chi2r+r[j]**2
        y.append(chi2r)

    ax.hist(y, normed=True, alpha=0.2) 
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x,df))
    
    #simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx/np.sqrt(ry/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
Ejemplo n.º 10
0
def compute_T_Pvalue(betas, stds_beta, mask_file, null_hyp=True):
    '''
    Compute Tvalues statistic and Pvalue based upon estimates
    and their standard deviation
    beta and std_beta for all voxels
    beta: shape (nb_vox, 1)
    std: shape (1)
    Assume null hypothesis if null_hyp is True
    '''
    from pyhrf.ndarray import xndarray

    import sys
    sys.path.append("/home/i2bm/BrainVisa/source/pyhrf/pyhrf-free/trunk/script/WIP/Scripts_IRMf_Adultes_Solv/Scripts_divers_utiles/Scripts_utiles/")
    from Functions_fit import Permutation_test, stat_mean, stat_Tvalue, stat_Wilcoxon

    mask = xndarray.load(mask_file).data #to save P and Tval on a map

    BvalC = xndarray(betas, axes_names=['sagittal', 'coronal', 'axial'])
    Betasval = BvalC.flatten(mask, axes=['sagittal', 'coronal', 'axial'], new_axis='position').data

    Stdsval = stds_beta

    Tval = xndarray(Betasval/Stdsval, axes_names=['position']).data

    nb_vox = Betasval.shape[0]
    nb_reg = betas.shape[1]
    dof = nb_vox - nb_reg #degrees of freedom for STudent distribution
    assert dof>0

    Probas=np.zeros(Betasval.shape)
    for i in xrange(nb_vox):
        if null_hyp:
            #STudent distribution
            from scipy.stats import t
            fmix = lambda x: t.pdf(x, dof)
        else:
            fmix = lambda t:  1/np.sqrt(2*np.pi*Stdsval[i]**2)*np.exp(- (t - Betasval[i])**2 / (2*Stdsval[i]**2) )
        Probas[i] = quad(fmix, Tval[i], float('inf'))[0]

    Tvalues_ = xndarray(Tval, axes_names=['position'])
    Pvalues_ = xndarray(Probas, axes_names=['position'])
    Tvalues = Tvalues_.expand(mask, 'position', ['sagittal','coronal','axial'])
    Pvalues = Pvalues_.expand(mask, 'position', ['sagittal','coronal','axial'])

    #Computation of Pvalue using permutations
    #not possible to do this actually...it was used for group level stats
    #Pvalue_t = np.zeros(Betasval.shape)
    #for i in xrange(nb_vox):
        #Pvalue_t[i] = Permutation_test(Betasval[i], n_permutations=10000, \
                    #stat = stat_Tvalue, two_tailed=False, plot_histo=False)

    return Tvalues.data, Pvalues.data
def demo_Student_t_pdf():
    x = np.linspace(-6, 6, 1000)
    plt.figure()
    colors = list('brkmcyg') * 3
    for i, dof in enumerate([1, 2, 5, 10, 20, 50, 100]):
        plt.plot(x,
                 t.pdf(x, dof),
                 colors[i] + '-',
                 lw=1,
                 alpha=0.7,
                 label='dof = ' + str(dof))
    plt.legend()
    plt.title("Student's t distribution", fontsize=15)
Ejemplo n.º 12
0
 def _p_value(self, t_value):
     """
     TODO check if calculation is correct with tvalue = paramter / std and pavalue with df as totalparameter and not
     as total observations - totalparameter
     :param t_value:
     :return: p-value of parameter
     """
     from scipy.stats import t as t_dist
     df = (self._ts_matrix.size) - (self._get_total_parameter() *
                                    len(self._wa_matrices))
     # TODO check calculation of p-values with degrees of freedom
     # p_value = tdist.pdf(abs(tvalue), self._total_parameter())
     return t_dist.pdf(abs(t_value), df)
    def test_multidim_student_t(self):
        from scipy.stats import t
        from cde.utils.distribution import multidim_t_pdf
        mu = 5 * np.ones(3)
        sigma = 3 * np.ones(3)
        dof = 6

        x = np.random.uniform(-10, 10, size=(100, 3))
        p1 = np.prod(t.pdf(x, loc=5, scale=3, df=dof), axis=-1)

        p2 = multidim_t_pdf(x, mu, sigma, dof)

        self.assertLessEqual(np.sum((p1 - p2)**2), 0.0001)
def plot_student(nu_values, x_range=[-8, 8], title=""):
    x = np.linspace(-8, 8, 101)
    plt.figure(figsize=(9, 7))
    for i, nu in enumerate(nu_values):
        #plt.plot(x, t.pdf(x, df=1, loc=1, scale=1), 'g-', label='nu=1')
        #plt.plot(x, t.pdf(x, df=5, loc=1, scale=1), 'r-', label='nu=5')
        plt.plot(x,
                 t.pdf(x, df=nu, loc=1, scale=1),
                 colors[i] + '-',
                 label='nu=%i' % (int(nu)))  # 'df' is parameter nu
    plt.plot(x, norm.pdf(x, loc=1, scale=1), 'k-', label='normal')
    plt.legend(fontsize=15)
    plt.title(title, fontsize=16)
Ejemplo n.º 15
0
    def parametric_Student_Portfolio(self, dof: int, co) -> {}:
        riskMeasures = {}

        mu = np.mean(self.portfolioReturns[name])
        std = np.std(self.portfolioReturns[name])
        riskMeasures['Portfolio VaR'] = round(
            mu + std * t.ppf(confidenceLevel, dof) * np.sqrt((dof - 2) / dof),
            3)
        riskMeasures['Portfolio ES'] = round(
            (-1 / (1 - confidenceLevel)) * (1 - dof)**(-1) *
            (dof - 2 + xanu**2) * t.pdf(xanu, dof) * std - mu, 3)

        return riskMeasures
Ejemplo n.º 16
0
        def predictive_postetior_tointegrate(mu, tau, x):
            M1 = r + self.discount_factor * x
            M2 = r**2 + 2 * self.discount_factor * r * x + self.discount_factor**2 * x**2
            new_mean = (lamb * mean + M1) / (lamb + 1)
            new_lambda = lamb + 1
            new_alpha = alpha + 0.5
            new_beta = beta + 0.5 * (M2 -
                                     M1**2) + (lamb *
                                               (M1 - mean)**2) / (2 *
                                                                  (lamb + 1))

            return normal_gamma_pdf(mu, tau, new_mean, new_lambda, new_alpha, new_beta) * \
                t.pdf(x, 2 * alpha2, loc=mean2, scale=np.sqrt(beta2/alpha2 * (1 + 1./lamb2)))
Ejemplo n.º 17
0
def tskew_pdf(x, df, loc, scale, skew):    
    """
    Density function of the tskew distribution 
    Based on the formula in Giot and Laurent (JAE 2003 pp. 650)
    - x = the value to evaluate
    - df: degrees of freedom (>1)
    - location: mean of the distribution
    - scale: standard deviation of the distribution
    - skew: skewness parameter (>0, if ==1: no skew, <1: left skew, >1 right)
    
    NB: I had to parametrize the formula differently to get consistent results
    
    """
    cons = (2/(skew + (1/skew)))/scale
    norm_x = x-(loc/scale)
    if x < loc/scale :
        pdf = cons*t.pdf(skew*norm_x, df, loc=0, scale=1) # Symmetric t pdf
    elif x >= loc/scale:
        pdf = cons*t.pdf(norm_x/skew, df, loc=0, scale=1) # Symmetric t pdf
    else:
        raise ValueError('Incorrect parameters')

    return(pdf)
    def update_from_experts(self, state, data):

        # compute return
        G_Q, G_U = data[0], data[1]
        epsilon = self.get_epsilon(state)
        G = (1.0 - epsilon) * G_Q + epsilon * G_U

        # update mu-hat and sigma^2-hat
        self.stat.update(G)
        mu, sigma2, t = self.stat.mean, self.stat.var, self.stat.count

        # update a_t and b_t
        a = self.a0 + t / 2
        b = self.b0 + t / 2 * sigma2 + t / 2 * (
            self.tau0 / (self.tau0 + t)) * (mu - self.mu0) * (mu - self.mu0)

        # compute e_t
        scale = (b / a)**0.5
        e_u = tdist.pdf(G, df=2.0 * a, loc=G_U, scale=scale)
        e_q = tdist.pdf(G, df=2.0 * a, loc=G_Q, scale=scale)

        # update posterior
        self.post.update(e_u, e_q)
Ejemplo n.º 19
0
    def gradient_scale(y: np.ndarray, location: np.ndarray, scale: np.ndarray,
                       nu: np.ndarray, tau: np.ndarray, weights: np.ndarray):
        """Calculates Gradient of scale parameter.

        """
        z = np.where(nu != 0, (((y / location)**nu - 1) / (nu * scale)),
                     np.log(y / location) / scale)
        w = (tau + 1) / (tau + z**2)
        h = t.pdf(1 / (scale * np.abs(nu)), df=tau) / t.cdf(
            1 / (scale * np.abs(nu)), df=tau)
        grad = (w * (z**2) - 1) / scale + h / (scale**2 * np.abs(nu))
        grad = stabilize_derivative(grad, BCT.stabilize)
        grad = grad * (-1) * weights
        return grad
Ejemplo n.º 20
0
 def predict(self, x1, x2):
     p1 = t.cdf((x1 - self.mu) / sqrt((self.beta * (self.n_null + 1)) / (self.alpha * self.n_null)), 2 * self.alpha)
     p2 = t.cdf((x2 - self.mu) / sqrt((self.beta * (self.n_null + 1)) / (self.alpha * self.n_null)), 2 * self.alpha)
     pges = round(100 * abs(p1 - p2), 3)
     #x = np.linspace(t.ppf(0.001, 2*self.alpha,loc=self.mu),t.ppf(0.999, 2*self.alpha,loc=self.mu), 100)
     x = np.linspace(self.mu-(t.ppf(0.99, 2*self.alpha,0))*sqrt(self.beta*(self.n_null+1)/(self.n_null*self.alpha)),self.mu+(t.ppf(0.99, 2*self.alpha,0))*sqrt(self.beta*(self.n_null+1)/(self.n_null*self.alpha)), 100)
     fig, ax = plt.subplots(1, 1)
     fillrange=np.linspace(x1,x2,100)
     ax.fill_between(fillrange, t.pdf(fillrange, df=2*self.alpha,loc=self.mu, scale=sqrt(self.beta*(self.n_null+1)/(self.n_null*self.alpha))),y2=0)
     ax.plot(x, t.pdf(x, df=2*self.alpha,loc=self.mu, scale=sqrt(self.beta*(self.n_null+1)/(self.n_null*self.alpha))))
     prob_statement=('The probability to obtain an outcome between '+ str(x1) + ' and '+ str(x2) + ' with the current knowledge is: '+ str(pges) +
           '%')
     if not os.path.isdir('static'):
         os.mkdir('static')
     else:
         # Remove old plot files
         for filename in glob.glob(os.path.join('static', '*.png')):
             os.remove(filename)
     # Use time since Jan 1, 1970 in filename in order make
     # a unique filename that the browser has not chached
     plotfile = os.path.join('static', str(time.time()) + '.png')
     plt.savefig(plotfile)
     return plotfile,prob_statement
Ejemplo n.º 21
0
 def marginal(sample_mu, sample_prec, n, cdf=False):
     sample_var = p2s(sample_prec)
     n = float(n)
     mu = (prior.precision * prior.mu + n * sample_mu) / (prior.precision +
                                                          n)
     precision = prior.precision + n
     alpha = prior.alpha + n / 2.
     beta = (prior.beta + 0.5 * (n * sample_var) +
             ((prior.precision * n * ((sample_mu - prior.mu)**2)) /
              (2 * (prior.precision + n))))
     tmu = mu
     tsigma = beta / (alpha * precision)
     if cdf:
         return lambda x: t.cdf(x, 2 * alpha, tmu, tsigma)
     return lambda x: t.pdf(x, 2 * alpha, tmu, tsigma)
Ejemplo n.º 22
0
    def plot_pdf(self, arg=np.linspace(-2, 2, 100)):
        """Plot probability density function.

        Parameters
        ----------
        arg : array
            Grid of point to evaluate PDF at

        """
        scale = (self.eta/(self.eta-2))**.5
        plt.plot(arg, t.pdf(arg, self.eta, scale=1/scale),
                 label='t distribution')
        plt.plot(arg, self.pdf(arg), label='skew-t distribution')
        plt.legend()
        plt.show()
def calc_CVar(Df, alpha=0.05, dist='n'):
    rtn = Df / Df.shift(1) - 1
    if dist == 'n':
        mu, sig = norm.fit(rtn.dropna().values)
        mu = mu * 252
        sig = sig * 252**(0.5)
        CVar = alpha**(-1.) * norm.pdf(norm.ppf(alpha)) * sig - mu
    if dist == 't':
        nu, mu, sig = t.fit(rtn.dropna().values)
        mu = mu * 252
        sig = sig * 252**(0.5)
        xanu = t.ppf(alpha, nu)
        CVar = -1. / alpha * (1 - nu)**(-1.) * (nu - 2 + xanu**2.) * t.pdf(
            xanu, nu) * sig - mu
    return CVar
def pdf(x, dof):
	'''
    Parameters
    ----------
    x: float, calc funcion in x.
        calc funcion in x
    dof: int, degrees of freedom.
        which is constant within each integration test case
    Returns
    -------
    pdf: float, probability in x
    	probability in x
    '''
    #research in scipy incorporate stats and T distribution. pdf.
	return t.pdf(t, dof)
def tlocscale(x, mu, scale2, nu):
    """
    shifted and scaled student-t pdf

    Arguments:
        x {list} -- nodes to evaluate density at
        mu {float} -- shift
        scale2 {float} -- scale
        nu {int} -- degrees of freedom

    Returns:
        list -- evaluations of pdf
    """
    scale = np.sqrt(scale2)
    return student_t.pdf((x - mu) / scale, nu) / scale
    def ei_given_sample(self, sample, parameters_kernel, candidate_point):
        """
        Correct
        See p. 1140. We compute (8)
        :param sample:
        :param parameters_kernel:
        :param candidate_point:
        :return:
        """
        M = np.min(sample)
        n = len(sample)
        mc, c, chol, Z, bc = self.compute_mc_given_sample(
            sample, candidate_point, parameters_kernel)
        historical_points = self.gp.data['points']

        candidate_vector = np.zeros(
            (len(self.domain_xe), historical_points.shape[1]))
        for j in range(len(self.domain_xe)):
            point = np.concatenate(
                (candidate_point, np.array(self.domain_xe[j])))
            candidate_vector[j, :] = point
        cov_new = self.gp.evaluate_cov(candidate_vector, parameters_kernel)
        weights_matrix = self.weights.reshape((len(self.weights), 1))

        Rc = np.dot(weights_matrix.transpose(), np.dot(cov_new,
                                                       weights_matrix))
        Rc -= np.dot(c, cho_solve(chol, c.transpose()))
        one = np.ones((2 * n, 1))
        Rc += (1 - np.dot(c, cho_solve(chol, one))) ** 2 / \
              (np.dot(one.transpose(), cho_solve(chol, one)))

        Zc = Z.reshape((len(Z), 1))
        sigma_c = np.dot(Zc.transpose(), cho_solve(chol, Zc))
        sigma_c -= (bc**2) * np.dot(one.transpose(), cho_solve(chol, one))
        sigma_c /= (2.0 * n - 1)

        difference = M - mc
        sd = 1.0 / np.sqrt(Rc * sigma_c)
        component_1 = (M - mc) * t.cdf(difference * sd, 2 * n - 1)

        component_2 = t.pdf(difference * sd, 2 * n - 1)
        component_2 *= 1.0 / (2.0 * (n - 1))
        component_2 *= (2.0 * n - 1) * np.sqrt(Rc * sigma_c) + (difference**
                                                                2) * sd

        result = component_1 + component_2

        return result[0, 0]
Ejemplo n.º 27
0
    def plot_pdf(self, arg=np.linspace(-2, 2, 100)):
        """Plot probability density function.

        Parameters
        ----------
        arg : array
            Grid of point to evaluate PDF at

        """
        scale = (self.eta / (self.eta - 2))**.5
        plt.plot(arg,
                 t.pdf(arg, self.eta, scale=1 / scale),
                 label='t distribution')
        plt.plot(arg, self.pdf(arg), label='skew-t distribution')
        plt.legend()
        plt.show()
Ejemplo n.º 28
0
    def plot_dsn(self, input_a=None, input_b=None):
        """Plots our p-value in a t-distribution"""
        if input_a:
            N = len(input_a)
        else:
            N = len(self.a)

        test_stat, df = self.testStat(input_a, input_b)
        fig, ax = plt.subplots(1, 1)
        x = np.linspace(t.ppf(0.01, df), t.ppf(0.99, df), 100)
        ax.plot(x, t.pdf(x, df), 'r-', lw=5, alpha=0.6, label='t pdf')
        ax.set_xlabel('Test Statistic Values')
        ax.set_ylabel('Probability')
        ax.set_title('T-Distribution with {}. D.o.F.'.format(df))
        plt.axvline(test_stat, lw=7)
        plt.show()
Ejemplo n.º 29
0
Archivo: 38.py Proyecto: XNYu/Statistic
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(t.ppf(0.01, df), t.ppf(0.99, df), 100)
    ax.plot(x, t.pdf(x, df))
    
    #simulate the sampling distribution
    y = []
    for i in range(1000):
        r = norm.rvs(loc=5, scale=2, size=df+1)
        rt =(np.mean(r)-5)/np.sqrt(np.var(r)/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('sampling_distribution.png')
Ejemplo n.º 30
0
 def marginal(samples, cdf=False):
     sample_mu, s = mean(samples), std(samples)**2.
     n = float(len(samples))
     mu = (prior_precision * prior_mean +
           n * sample_mu) / (prior_precision + n)
     precision = prior_precision + n
     alpha = palpha + n / 2.
     beta = (pbeta + 0.5 * (sum(
         (samples - sample_mu)**2) + ((prior_precision * n *
                                       ((sample_mu - prior_mean)**2)) /
                                      (2 * (prior_precision + n)))))
     tmu = mu
     tsigma = beta / (alpha * precision)
     if cdf:
         return lambda x: t.cdf(x, 2 * alpha, tmu, tsigma**.5)
     return lambda x: t.pdf(x, 2 * alpha, tmu, tsigma**.5)
Ejemplo n.º 31
0
 def pred_prob(self, data_list, x):
     """Compute UPM predictive probabilities.
     """
     #x = data_list[-1]; print("data_list :",x, data_list)   
     data_list = data_list[:-1] # The last data-point included manually on the following step.
     data_length = len(data_list);
     beta_list = [self.beta0]
     #if data_length > 1:
     #print("**", self.gamma_params)
     for ind in range(data_length):
         r_ind = data_length - ind - 1
         beta = self.beta0 + 0.5*sum((np.array(data_list[r_ind:]) - np.average(np.array(data_list[r_ind:])))**2) \
         + (ind + 1)*self.gamma_params[0]/(self.gamma_params[0] + (ind + 1)) * 0.5 * \
         (np.average(np.array(data_list[r_ind:])) - self.mu_params[0])**2
         beta_list.append(beta)
         #print("1 :",0.5*sum((np.array(data_list[r_ind:]) - np.average(np.array(data_list[r_ind:])))**2))
         #print("2 :",(ind + 1)*self.gamma_params[ind]/(self.gamma_params[ind] + (ind + 1)) * 0.5)
         #print("3 :",(np.average(np.array(data_list[r_ind:])) - self.mu_params[0])**2)
         #print(data_list[r_ind:])
         #print("* ", (np.array(data_list[r_ind:]) - np.average(np.array(data_list[r_ind:]))))
         #print("** ", sum((np.array(data_list[r_ind:]) - np.average(np.array(data_list[r_ind:])))**2))
     #print(self.num)
     #print(np.array(self.alpha_params))
     #print("beta_list : ", np.array(beta_list))
     #print()
     d = lambda x, df, mu, std : t.pdf(x, df, mu, std) # T distribution for conservative modeling
     d_normal = lambda x, mu, std: norm.pdf(x, mu, std) # Normal distribution for general purpose modeling
     
     ArrayForReturn_t = np.array([d(x, 2*self.alpha_params[i], self.mu_params[i], 
                                  np.sqrt(beta_list[i]*(self.gamma_params[i]+1)/(self.alpha_params[i]*self.gamma_params[i]))) \
                                for i in range(data_length+1)])
     
     ArrayForReturn_norm = np.array([d_normal(x, self.mu_params[i],
                                               beta_list[i]*(self.gamma_params[i]+1)/(self.alpha_params[i]*self.gamma_params[i]))\
                                       for i in range(data_length+1)])
     #print("{}, Expectd Mu : {}, Expected STD : {}".format(self.num ,self.mu_params[-1]
     #    ,math.sqrt(1/(self.alpha_params[-1]/beta_list[-1])) ))
     #print("   STE Mu : {}, STE Std {}".format(math.sqrt(beta_list[-1]/(self.gamma_params[-1]*(self.alpha_params[-1]+1))),
     #                                         math.sqrt(beta_list[-1]*math.sqrt(math.sqrt(1.0/self.alpha_params[-1])))))
     #print("E(std) : ", np.sqrt(np.array(beta_list)/self.alpha_params))
     #print("E(mu) : ", self.mu_params)
     #print(np.array(beta_list))
     #print(self.alpha_params)
     self.num += 1
     
     #print()
     return ArrayForReturn_t
Ejemplo n.º 32
0
def plot_apprxn(S, batch, S_1, batch_1, vol_sup_node, no_tours, F_org_except_sup):
    bmap = brewer2mpl.get_map('Set2', 'qualitative', 7)
    colors = bmap.mpl_colors    
    params = {
       'axes.labelsize': 10,
       'text.fontsize': 8,
       'legend.fontsize': 10,
       'xtick.labelsize': 10,
       'ytick.labelsize': 10,
       'text.usetex': False,
       'figure.figsize': [6, 5]
       }    
    plt.rcParams.update(params)
    ax=plt.axes(frameon=0)
    plt.grid()    
    ax.set_xlabel('$\hat{\mu}(\mathcal{D}_m(S_n))$')
    
    S=np.array(S)*(vol_sup_node/(2*batch))
    plt.hist(S, normed=True, alpha=0.5, color=colors[1],label='Empirical distribution')
          
    S_1=np.array(S_1)*(vol_sup_node/(2*batch_1))
    m_0=0;mu_0=0;nu_0=0;sigma_0=1
    n_stt= len(S_1)#31 #no_tours # "n" for student-t 
    nu_tild=nu_0+n_stt
    S1_avg=np.mean(S_1)
    S1_sum=sum(S_1)
    mu_tild=(m_0*mu_0+n_stt*S1_avg)/(m_0+n_stt)

    temp_second_term=sum((S_1-S1_avg)**2)
    temp_third_term=(m_0*n_stt*(S1_avg-mu_0)**2)/(m_0+n_stt)
    sigma_tild=np.sqrt((nu_0*(sigma_0)**2+temp_second_term+temp_third_term)/((nu_0+n_stt)*(m_0+n_stt)))
    
    samples_temp = np.linspace(t.ppf(10**(-15), nu_tild, loc=mu_tild, scale=sigma_tild),t.ppf((1-10**(-15)), nu_tild, loc=mu_tild, scale=sigma_tild), 50**4)
#    samples_temp = np.linspace(4*10**9,8*10**9, 50**4)
    plt.plot(samples_temp, t.pdf(samples_temp, nu_tild, loc=mu_tild, scale=sigma_tild), color=colors[0], linewidth=2,linestyle='-',label='Approximate posterior')    
    plt.axvline(x=F_org_except_sup, ymin=0, color="blue",alpha=0.75, label='True value', linewidth=2)         
    
    legend=plt.legend(loc='best')
    frame = legend.get_frame()
    frame.set_facecolor('0.9')
    frame.set_edgecolor('0.75')
    
    plt.title('Friendster Network')    
    plt.savefig('plot_HypRW.pdf')
    print "Saved the figure for Friendster graph in 'plot_HypRW.pdf'" 
    
    print "Normalized percentage error in true value: ", 100*(F_org_except_sup-S1_avg)/F_org_except_sup   
Ejemplo n.º 33
0
    def gradient_nu(y: np.ndarray, location: np.ndarray, scale: np.ndarray,
                    nu: np.ndarray, tau: np.ndarray, weights: np.ndarray):
        """Calculates Gradient of nu parameter.

        """
        z = np.where(nu != 0, (((y / location)**nu - 1) / (nu * scale)),
                     np.log(y / location) / scale)
        w = (tau + 1) / (tau + z**2)
        h = t.pdf(1 / (scale * np.abs(nu)), df=tau) / t.cdf(
            1 / (scale * np.abs(nu)), df=tau)
        grad = ((w * z**2) / nu) - np.log(y / location) * (w * z**2 +
                                                           ((w * z) /
                                                            (scale * nu)) - 1)
        grad = grad + np.sign(nu) * h / (scale * nu**2)
        grad = stabilize_derivative(grad, BCT.stabilize)
        grad = grad * (-1) * weights
        return grad
def naiveBayes(xtrain, ytrain, xtest):
    ntrain = xtrain.shape[0]
    ntest = xtest.shape[0]
    log_prob = pd.DataFrame(0, index = np.arange(ntest), columns = [0, 1])
    for y in [0, 1]:
        xtrain_y = xtrain[ytrain[0] == y]
        n_y = xtrain_y.shape[0]
        prob_y = (1 + n_y) / (ntrain + 2)
        log_condi_prob = pd.DataFrame(0, index = np.arange(ntest), columns = range(15))
        for d in range(15):
            [df, loc, scale] = parameters(xtrain_y[d])
            log_condi_prob[d] = np.log(t.pdf(xtest[d], df, loc, scale))
        log_prob[y] = log_condi_prob.sum(axis = 1) + np.log(prob_y)
    prob = log_prob.apply(lambda x: np.exp(x), axis = 1)
    prob = prob.apply(lambda x: pd.Series([x[0]/x.sum(), x[1]/x.sum()]), axis = 1)
    pred = prob.apply(lambda x: 0 if x[0] > x[1] else 1, axis = 1)
    return(prob, pred)
Ejemplo n.º 35
0
    def resid_density_plot(self):
        n = self.n
        p = self.p
        n0 = self.n0
        studentized_residuals = self.residual_analysis['student_resid']
        ll = np.linspace(studentized_residuals.min(),
                         studentized_residuals.max(), n0)
        t_density = t.pdf(ll, df=n - p)

        plt.figure(figsize=(10, 5))
        sns.histplot(studentized_residuals,
                     stat='density',
                     kde=True,
                     label='kernel estimator')
        plt.plot(ll, t_density, 'r--', linewidth=2, label='t_density')
        plt.title("Histogram and t-Density for Studentized Residuals")
        plt.legend(loc='upper left')
def t_distribution():
    fig, ax = plt.subplots(1, 1)
    # display the probability density function
    df = 10
    x = np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x, df))

    # simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx / np.sqrt(ry / df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('t_distribution.png')
Ejemplo n.º 37
0
Archivo: 33.py Proyecto: XNYu/Statistic
def t_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x,df))
    
    #simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx/np.sqrt(ry/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('t_distribution.png')
Ejemplo n.º 38
0
 def _post_fit(self, optimization_res, coeff_names, N, verbose=1):
     self.convergence = optimization_res['success']
     self.coeff_ = optimization_res['x']
     self.stderr = np.sqrt(np.diag(optimization_res['hess_inv']))
     self.zvalues = self.coeff_ / self.stderr
     self.pvalues = 2 * t.pdf(-np.abs(self.zvalues),
                              df=N)  # two tailed test
     self.loglikelihood = -optimization_res['fun']
     self.coeff_names = coeff_names
     self.total_iter = optimization_res['nit']
     if self.convergence and verbose > 0:
         print("Estimation succesfully completed after {} iterations. "
               "Use .summary() to see the estimated values".format(
                   self.total_iter))
     if not self.convergence and verbose > 0:
         print("**** The optimization did not converge after {} "
               "iterations. ****".format(self.total_iter))
         print("Message: " + optimization_res['message'])
Ejemplo n.º 39
0
def get_dists(lr, lb):
    kde_dict = dict()
    t_dict = dict()
    norm_dict = dict()
    for i in range(3):
        temp = lr[lb == i]
        xr = np.linspace(lr.min(), lr.max(), 1000)

        kde = gaussian_kde(temp)
        y = kde(xr)
        kde_dict[str(i)] = y

        t_pdf = t.pdf(xr, *t.fit(temp))
        n_pdf = norm.pdf(xr, *norm.fit(temp))
        t_dict[str(i)] = t_pdf
        norm_dict[str(i)] = n_pdf

    return xr, kde_dict, t_dict, norm_dict
Ejemplo n.º 40
0
def estimate_responsiveness(plan_df):
    elec_results = plan_df[['2008', '2012', '2016']].values
    state_year_results = elec_results.mean(axis=0)
    state_vote_t = t(df=2,
                     loc=state_year_results.mean(),
                     scale=state_year_results.std(ddof=1))
    truncation_factor = 1 / (state_vote_t.cdf(1) - state_vote_t.cdf(0))

    district_share = district_votes_given_state_vote(plan_df)
    state_share = district_share.mean(axis=0)

    district_std = elec_results.std(axis=1, ddof=1)
    district_std = np.tile(district_std, (len(state_share), 1)).T

    vote_seat_slope = np.nan_to_num(
        t.pdf(.5, df=2, loc=district_share, scale=district_std)).mean(axis=0)
    return simps(
        vote_seat_slope * state_vote_t.pdf(state_share) * truncation_factor,
        state_share)
Ejemplo n.º 41
0
    def _post_fit(self, optimization_res, coeff_names, sample_size, verbose=1):
        self.convergence = optimization_res['success']
        self.coeff_ = optimization_res['x']
        self.stderr = np.sqrt(np.diag(optimization_res['hess_inv']))
        self.zvalues = self.coeff_ / self.stderr
        self.pvalues = 2 * t.pdf(-np.abs(self.zvalues), df=sample_size)
        self.loglikelihood = -optimization_res['fun']
        self.coeff_names = coeff_names
        self.total_iter = optimization_res['nit']
        self.estim_time_sec = time() - self._fit_start_time
        self.sample_size = sample_size
        self.aic = 2 * len(self.coeff_) - 2 * self.loglikelihood
        self.bic = np.log(sample_size) * len(
            self.coeff_) - 2 * self.loglikelihood

        if not self.convergence and verbose > 0:
            print("**** The optimization did not converge after {} "
                  "iterations. ****".format(self.total_iter))
            print("Message: " + optimization_res['message'])
Ejemplo n.º 42
0
def student(x, normf, mu, sig, skew, nu):
    return map ( lambda y: 2.*t.pdf((y-mu)/sig, nu)*t.cdf(skew*((y-mu)/sig)*np.sqrt((nu+1.0)/(nu+(y-mu)*(y-mu)/sig/sig)),(nu+1.0))/sig*normf if ( (y <3.05) & (y>2.0) ) else 0., x)
Ejemplo n.º 43
0
from scipy.stats import t
print(t.pdf(2,3))
# Enter your code here.
X = pisa["year"]
Xbar = X.mean()
ssX = ((X - Xbar)**2).sum()
n = pisa.shape[0]
s2b1 = SSE/(ssX * (n-2))

## 11. T-Distribution ##

from scipy.stats import t

# 100 values between -3 and 3
x = np.linspace(-3,3,100)

# Compute the pdf with 3 degrees of freedom
print(t.pdf(x=x, df=3))

## 12. Statistical Significance of Coefficients ##

# The variable s2b1 is in memory.  The variance of beta_1
tstat = linearfit.params["year"]/(s2b1**0.5)

## 13. The P-Value ##

# At the 95% confidence interval for a two-sided t-test we must use a p-value of 0.975
pval = 0.975

# The degrees of freedom
df = pisa.shape[0] - 2

# The probability to test against
Ejemplo n.º 45
0
def confidence_interval():

    # --------------------------------------
    # 95% confidence interval - t Student.    
    # sigma unknown - Anderson 2008 pag 310.
    # http://adventuresinpython.blogspot.com.br/2012/12/confidence-intervals-in-python.html

    # Levine pag 292, exercício 8.14.
    s = np.array([1, 2, 3, 4, 5, 6, 20])

    # extrai parametros do vetor.
    n, min_max, mean, var, skew, kurt = stats.describe(s)
    print '\ndiferente, var extraida por stats.describe(s) e np.var(s)'
    print 'denominador (n-1) = ', str(var), ', denominador (n) = ', str(np.var(s)), '\n'

    # set parametros.
    s_amostra = math.sqrt(var)
    n_amostra = n
    mu_amostra = mean
    alpha = 0.05

    # calcula os t scores para os pontos críticos,
    # percent point function (ppf), dada uma cdf calcula o ppf,
    # ou seja, o valor x do ponto na Student curve
    # (n_amostra - 1) é o grau de liberdade.
    # norm.ppt equivale a norminv e norm.pdf a normpdf.
    tinf = t.ppf(alpha/2,n_amostra-1)
    tsup = t.ppf(1-(alpha/2),n_amostra-1)

    
    xis_inf = mu_amostra + (tinf * (s_amostra / math.sqrt(n_amostra)))
    xis_sup = mu_amostra + (tsup * (s_amostra / math.sqrt(n_amostra)))

    print xis_inf, xis_sup

    # ---------------------------------------------
    # desenha curva baseada na distribuição normal.
    # ---------------------------------------------
    mu = mu_amostra
    sigma = s_amostra
    limite_inferior = xis_inf
    limite_superior = xis_sup

    xa = np.linspace(mu-(4*sigma),limite_inferior)
    xb = np.linspace(limite_inferior,limite_superior)
    xc = np.linspace(limite_superior,mu+(4*sigma))
    
    x = np.concatenate((xa,xb,xc), axis=0)

    ya = norm.pdf(xa,mu,sigma)
    yb = norm.pdf(xb,mu,sigma)
    yc = norm.pdf(xc,mu,sigma)

    y = np.concatenate((ya,yb,yc), axis=0)

    pf_plota.plotar('Confidence Interval','t student scores','x bar', \
           'probabilidade','pdf',x,y,limite_inferior, \
           limite_superior,sigma,mu,xa,ya,xc,yc,alpha,np.array([]),n_amostra)
    
    # ----------------------------------------------
    # desenha curva baseada na distribuição Student.
    # ----------------------------------------------    
    mu = mu_amostra
    sigma = s_amostra
    limite_inferior = tinf
    limite_superior = tsup
    
    xa = np.linspace(-4,limite_inferior)
    xb = np.linspace(limite_inferior,limite_superior)
    xc = np.linspace(limite_superior,4)
    
    x = np.concatenate((xa,xb,xc), axis=0)
    
    ya = t.pdf(xa,n_amostra-1)
    yb = t.pdf(xb,n_amostra-1)
    yc = t.pdf(xc,n_amostra-1)
    
    y = np.concatenate((ya,yb,yc), axis=0)

    za = norm.pdf(xa)
    zb = norm.pdf(xb)
    zc = norm.pdf(xc)
    
    z = np.concatenate((za,zb,zc), axis=0)
    
    pf_plota.plotar('Confidence Interval','t student scores','x bar', \
               'probabilidade','pdf',x,y,limite_inferior, \
               limite_superior,sigma,mu,xa,ya,xc,yc,alpha,z,n_amostra)
Ejemplo n.º 46
0
a = np.random.randn(30)
outliers = np.array([8, 8.75, 9.5])
pl.hist(a, 7, weights=[1 / 30] * 30, rwidth=0.8)

#fit without outliers
x = np.linspace(-5, 10, 500)

loc, scale = norm.fit(a)
n = norm.pdf(x, loc=loc, scale=scale)

loc, scale = laplace.fit(a)
l = laplace.pdf(x, loc=loc, scale=scale)

fd, loc, scale = t.fit(a)
s = t.pdf(x, fd, loc=loc, scale=scale)
pl.plot(x, n, 'k>',
        x, s, 'r-',
        x, l, 'b--')
pl.legend(('Gauss', 'Student', 'Laplace'))
pl.savefig('robustDemo_without_outliers.png')

#add the outliers
pl.figure()
pl.hist(a, 7, weights=[1 / 33] * 30, rwidth=0.8)
pl.hist(outliers, 3, weights=[1 / 33] * 3, rwidth=0.8)
aa = np.hstack((a, outliers))

loc, scale = norm.fit(aa)
n = norm.pdf(x, loc=loc, scale=scale)
Ejemplo n.º 47
0
            counts[k] -= 1

            # If this was the last data point in this cluster, delete it
            if counts[k] == 0:
                del counts[k]
                del clusters[k]
                # Update the index of all the other assignments
                assignments[assignments > k] -= 1

            # Calculate the weight for a new cluster
            # See Escobar and West (1995) for details on why this is the weight.
            # See the Wikipedia page on conjugate priors for the form of the Student's t
            # distribution.
            new_cluster_posterior = cluster_prior.posterior(np.array([y]))
            t_scale = new_cluster_posterior.b * (new_cluster_posterior.nu + 1) / (new_cluster_posterior.a * new_cluster_posterior.nu)
            new_cluster_weight = ALPHA * t.pdf(y, 2. * new_cluster_posterior.a, loc=new_cluster_posterior.mu, scale=t_scale)

            # Calculate the weight for all the other clusters
            z = [counts[k] * norm.pdf(y, kmean, kstdev) for k,(kmean, kstdev) in enumerate(clusters)]
            z.append(new_cluster_weight)
            weights = np.array(z)

            # Draw a new assignment proportional to the cluster weights
            k = weighted_sample(weights)
            assignments[i] = k

            # If we sampled a new cluster
            if k == len(clusters):
                # We need to sample the parameters from the prior
                # TODO: should we instead sample from the posterior with the one sample?
                kmean, kstdev = cluster_prior.sample()
mu = 0. # the mean, mu
nus = [1., 2., 5, 10, 100] # standard deviations, sigma
markers = ['b-', 'r-', 'm-', 'c-', 'g-']

x = np.linspace(-6, 6, 1000) # x

# set plot to render labels using latex
pl.rc('text', usetex=True)
pl.rc('font', family='serif')
pl.rc('font', size=14)
fig = pl.figure(figsize=(6,5), dpi=100)

# plot pdfs
for i, nu in enumerate(nus):
  pl.plot(x, t.pdf(x, nu), markers[i], label='$\\nu=%d$'%nu)

# plot a Gaussian for comparison
pl.plot(x, norm.pdf(x, mu, 1.), 'k--', label='$N(0,1)$')

ax = pl.gca()
ax.set_xlabel('$t$', fontsize=14)
ax.set_ylabel('$p(t)$', fontsize=14)

ax.legend(loc='best', frameon=False)

fig.subplots_adjust(bottom=0.15)

pl.savefig('../studentst.pdf')
pl.show()
Ejemplo n.º 49
0
def test_sprot():
    algn = read_free(sprot_file)
    # truncate alignments to sequence positions with
    # gap frequency no greater than 20% - to avoid over-representation of gaps
    # alignments = truncate(algn, FRAC_ALPHA_CUTOFF)
    # print alignments.shape
    pdb_res_list = read_pdb(SPROT_PDB_FILE, 'E')
    msa_algn = msa_search(pdb_res_list, algn)
    print msa_algn
    sca_algn = sca(algn)
    algn_shape = get_algn_shape(algn)
    no_pos = algn_shape.no_pos
    no_seq = algn_shape.no_seq
    no_aa = algn_shape.no_aa

    print 'Testing SCA module :'
    print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin)))
    print 'weighted_3d_algn hash :' +\
        str(np.sum(np.square(sca_algn.weighted_3d_algn)))
    print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight)))
    print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX)))
    print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm)))
    print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp)))
    print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs)))
    spect = spectral_decomp(sca_algn, 100)
    print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev)))
    print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd)))

    svd_output = LA.svd(sca_algn.pwX)
    U = svd_output[0]
    sv = svd_output[1]
    V = svd_output[2]

    # perform independent components calculations
    kmax = 8
    learnrate = 0.0001
    iterations = 20000
    w = ica(transpose(spect.pos_ev[:, 0:kmax]), learnrate, iterations)
    ic_P = transpose(dot(w, transpose(spect.pos_ev[:, 0:kmax])))

    print "ic_P hash :" + str(mat_sum(square(ic_P)))
    # calculate the matrix Pi = U*V'
    # this provides a mathematical mapping between
    # positional and sequence correlation

    n_min = min(no_seq, no_pos)
    Pi = dot(U[:, 0:n_min-1], transpose(V[:, 0:n_min-1]))
    U_p = dot(Pi, spect.pos_ev)

    p_cutoff = 0.9
    nfit = 3
    cutoffs = zeros((nfit, 1))
    sector_def = []

    for i in range(0, nfit):
        nu, mu, sigma = t.fit(ic_P[:, i])
        q75, q25 = percentile(ic_P[:, i], [75, 25])
        iqr = q75 - q25
        binwidth = 2*iqr*pow(size(ic_P[:, i]), -1/3.0)  # Freedman-Diaconisrule
        nbins = round(ptp(ic_P[:, i])/binwidth)
        yhist, xhist = histogram(ic_P[:, i], nbins)
        x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100)
        cdf_jnk = t.cdf(x_dist, nu, mu, sigma)
        pdf_jnk = t.pdf(x_dist, nu, mu, sigma)
        maxpos = argmax(pdf_jnk)
        tail = zeros((1, size(pdf_jnk)))
        if abs(max(ic_P[:, i])) > abs(min(ic_P[:, i])):
            tail[:, maxpos:] = cdf_jnk[maxpos:]
        else:
            tail[0:maxpos] = cdf_jnk[0:maxpos]
        x_dist_pos = argmin(abs(tail - p_cutoff))
        cutoffs[i] = x_dist[x_dist_pos]
        sector_def.append(array(where(ic_P[:, i] > cutoffs[i])[0])[0])
    print sector_def
Ejemplo n.º 50
0
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 50
    x=np.linspace(t.ppf(0.01, df), t.ppf(0.99, df), 100)
    ax.plot(x, t.pdf(x, df))
Ejemplo n.º 51
0
#!/usr/bin/env python

import numpy as np
import matplotlib.pyplot as pl
from scipy.stats import t, laplace, norm

x = np.linspace(-4, 4, 100)
n = norm.pdf(x, loc=0, scale=1)
l = laplace.pdf(x, loc=0, scale=1 / (2 ** 0.5))
t = t.pdf(x, df=1, loc=0, scale=1)

pl.plot(n, 'k:',
        t, 'b--',
        l, 'r-')
pl.legend(('Gauss', 'Student', 'Laplace'))
pl.savefig('studentLaplacePdfPlot_1.png')

pl.figure()
pl.plot(np.log(n), 'k:',
        np.log(t), 'b--',
        np.log(l), 'r-')
pl.legend(('Gauss', 'Student', 'Laplace'))
pl.savefig('studentLaplacePdfPlot_2.png')

pl.show()
Ejemplo n.º 52
0
Y = Data[1]    #pull out the dependent variables
Coefficients = np.linalg.inv(X.T.dot(X)).dot(X.T.dot(Y))    #use the classic formula
print(Coefficients)    #compare the estimated coefficients with the real coefficients

#another way to do this using mle:
X = Data[0]    #pull out the explanatory variables and transpose it
Y = Data[1]    #pull out the dependent variables
b = np.random.uniform(size = len(X[0]) + 1)*0.1    #generate random starting values
Coefficients = minimize(OLS_mle, x0 = b, args = (X,Y), method = 'BFGS').x    #optimize the adaptive lasso

#compute standard errors
s2 = sum((Y - X.dot(Coefficients[0:n_vars+1]))**2)/(N-(n_vars + 1))    #estimate s2
var_hat = s2*np.linalg.inv(X.T.dot(X))    #compute var hat
Standard_Error = (np.diag(var_hat))**0.5    #compute the standard errors
t_stat = Coefficients[0:n_vars+1]/Standard_Error    #compute the t statistics for each variable
p_values = 2*t.pdf(abs(t_stat),df = N - 1)    #compute the p-values
tm(Coefficients,Standard_Error,t_stat,p_values)    #call the table making function to compute the table






#compare r2 as the number of features goes up
r2 = np.zeros(6)    #an array to hold the r2s
r2_adj = np.zeros(6)    #an array to hold the r2_adjs
n_f = np.array([2,20,40,60,80,100])    #an array of # of features
for i in range(len(n_f)):    #loop over features
    r2r2_adj = n_features(n_f[i])    #run model and extract r2
    r2[i] = r2r2_adj[0]    #pull out r2
    r2_adj[i] = r2r2_adj[1]    #pull out r2_adj
Ejemplo n.º 53
0
# use np.abs to get upper tail
p = st.distributions.t.sf(np.abs(t), df) * 2  
print("Probability of sample outcome by chance: ", p)

alpha = 0.05
if p < alpha:
    print("Significant")
else:
    print("Not signficant")

# <codecell>

from scipy.stats import t

x = np.linspace(t.ppf(0.0001, df), t.ppf(0.9999, df), 100)
plt.plot(x, t.pdf(x, df), color=isseorange, alpha=0.9, label='t pdf')
plt.fill_between(x, t.pdf(x, df), facecolor=isseorange, alpha = 0.4)
plt.xlabel('Probabilty distribution over t values')
plt.legend(loc='best', frameon=False)#
plt.title('Degrees of freedom 298')
plt.savefig('student-t.pdf')

# <codecell>

x = np.linspace(t.ppf(9.92242823716e-161, df), t.ppf(0.999999999, df), 100)
plt.plot(x, t.pdf(x, df), color=isseorange, alpha=0.9, label='t pdf')
plt.fill_between(x, t.pdf(x, df), facecolor=isseorange, alpha = 0.4)
plt.xlabel('Probabilty distribution over t values')
plt.legend(loc='best', frameon=False)
plt.savefig('student-t2.pdf')
Ejemplo n.º 54
0
def studentv(x, normf, mu, sig, skew, nu):
    if ( (x <3.05) & (x>2.0) ):
        return 2.*t.pdf((x-mu)/sig, nu)*t.cdf(skew*((x-mu)/sig)*np.sqrt((nu+1.0)/(nu+(x-mu)*(x-mu)/sig/sig)),(nu+1.0))/sig*normf
    else:
        return 0.
Ejemplo n.º 55
0
# 
# The density functions of the t-distributions are used in signficance testing. The probability density function (pdf) models the relative likelihood of a continous random variable. The cumulative density function (cdf) models the probability of a random variable being less than or equal to a point. The degrees of freedom (df) accounts for the number of observations in the sample. In general the degrees of freedom will be equal to the number of observations minus 2. Say we had a sample size with just 2 observations, we could fit a line through them perfectly and no error in the model. To account for this we subtract 2 from the number of observations to compute the degrees of freedom.
# 
# Scipy has a functions in the library scipy.stats.t which can be used to compute the pdf and cdf of the t-distribution for any number of degrees of freedom. scipy.stats.t.pdf(x,df) is used to estimate the pdf at variable x with df degrees of freedom.

# In[11]:

from scipy.stats import t
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')

# 100 values between -3 and 3.
x = np.linspace(-3,3,100)

# Compute the pdf with 3 degrees of freedom.
print("t.pdf:\n", t.pdf(x=x, df=3))
# Pdf with 3 degrees of freedom.
tdist3 = t.pdf(x=x, df=3)
# Pdf with 30 degrees of freedom.
tdist30 = t.pdf(x=x, df=30)

# Plot pdfs
plt.plot(x, tdist3)
plt.plot(x, tdist30)


# ###12: Statistical Significance of Coefficients

# Now that we know what the t-distribution is we can use it for significance testing. To do significance testing we must first start by stating our hypothesis. We want to test whether the lean of the tower depends on the year, ie. every year the tower leans a certain amount. This is done by setting null and alternative hypotheses. In our case we will say the null hypothesis is that the lean of the tower of pisa does not depend on the year, meaning the coefficient will be equal to zero. The alternative hypothesis would then follow as the lean of the tower depend on the year, the coefficient is not equal to zero. These are written mathematically as,
# 
# $H_0: \beta_1=0$
## 10. Variance of Coefficients ##

# Enter your code here.

s2b1 = SSE / (pisa.shape[0] - 2) * pisa['year'].var()
print(s2b1)

## 11. T-Distribution ##

from scipy.stats import t

# 100 values between -3 and 3
x = np.linspace(-3,3,100)

# Compute the pdf with 3 degrees of freedom
print(t.pdf(x=x, df=3))

plt.plot(x, t.pdf(x=x, df=3))
plt.show()

## 12. Statistical Significance of Coefficients ##

# The variable s2b1 is in memory.  The variance of beta_1

tstat = abs(linearfit.params.year) / (s2b1) ** (1/2)
print(tstat)

## 13. The P-Value ##

# At the 95% confidence interval for a two-sided t-test we must use a p-value of 0.975
pval = 0.975
Ejemplo n.º 57
0
        gl.plot(x_grid, y_values, color = color, fill = 1, alpha = 0.1)
                   
    gl.set_zoom(ax = ax1, X = ret1,xlimPad = [0.1,0.1])

    gl.savefig(folder_images +'InitPointsInferenceDaysEstimation.png', 
               dpi = 100, sizeInches = [10, 4])
               

if (t_distribution_graph):
    gl.init_figure()
    x_grid = np.linspace(-4,4,100)
    dfs = [1,3,5,26]
    
              
    for df in dfs:
        t_pdf = t.pdf(x_grid, df) 

        color = gl.get_color()
        ax1 = gl.plot(x_grid, t_pdf, alpha = 1, lw = 3, AxesStyle = "Normal",
                   legend = ["df %i"%df],color = color,
                labels = ["t-distribution","t","pdf(t)"])

    color = "k";
    x_grid, y_values = bMA.gaussian1D_points(mean = 0, std = 1, num = 100, x_grid = x_grid)        
    gl.plot(x_grid, y_values, alpha = 0.1, lw = 3, AxesStyle = "Normal",
               legend = ["Guassian"],color = color, fill = 1)

    gl.set_zoom(ax = ax1, X = x_grid,xlimPad = [0.1,0.1])

    gl.savefig(folder_images +'t-distribution.png', 
               dpi = 100, sizeInches = [14,6])
Ejemplo n.º 58
0
    p_r = np.zeros(shape=(n + 1, n))  # allocate memory to store the entries of p(r_t|x_{1:t})
    map_p_r = np.empty(shape=(n,))  # allocate memory to store the MAP estimate of p(r_t|x_{1:t})

    # BOCPD initialization
    k_n = np.array([k_0])
    alpha_n = np.array([alpha_0])
    mu_n = np.array([mu_0])
    beta_n = np.array([beta_0])
    x_sum = np.array([0.0])
    x2_sum = np.array([0.0])
    p_r_x = np.array([1.0])  # p(r_t, x_{1:t}), t = 0 := p(r_0 = 0) = 1

    # start BOCPD loop
    for i, x_i in enumerate(x):  # observe new datum
        # compute the predictive probabilities p(x_t|r_{t-1}, x_{t-r:t-1})
        p_x = student_t.pdf(x_i, 2.0 * alpha_n, mu_n, np.sqrt(beta_n * (k_n + 1.0) / (alpha_n * k_n)))

        # compute the growth probabilities p(r_t != 0, x_{1:t})
        p_rx_x = (1.0 - 1.0 / l) * p_x * p_r_x

        # compute the changepoint probability, p(r_t = 0, x_{1:t})
        p_r0_x = (1.0 / l) * np.dot(p_x, p_r_x)

        # update the probability distribution p(r_t, x_{1:t}) and normalize it to obtain
        # p(r_t|x_{1:t})
        p_r_x = np.append(p_r0_x, p_rx_x)
        p_r_x = p_r_x / np.sum(p_r_x)

        # keep the result in memory
        p_r[0 : i + 2, i] = p_r_x  # p(r_t|x_{1:t})
        map_p_r[i] = p_r_x.argmax()  # argmax r_t p(r_t|x_{1:t})
# VARIANCE OF COEFFICIENTS
# Compute SSE
SSE = np.sum((y.values - yhat)**2)
# Compute variance in X
xvar = np.sum((pisa.year - pisa.year.mean())**2)
# Compute variance in b1 
s2b1 = (SSE / (y.shape[0] - 2)) / xvar

# T-DISTRIBUTION
from scipy.stats import t

# 100 values between -3 and 3
x = np.linspace(-3,3,100)

# Compute the pdf with 3 degrees of freedom
print(t.pdf(x=x, df=3))
# Pdf with 3 degrees of freedom
tdist3 = t.pdf(x=x, df=3)
# Pdf with 30 degrees of freedom
tdist30 = t.pdf(x=x, df=30)

# Plot pdfs
plt.plot(x, tdist3)
plt.plot(x, tdist30)

# STATISTICAL SIGNIFICANCE OF COEFFICIENTS
tstat = linearfit.params["year"] / np.sqrt(s2b1)

# P-VALUE
# At the 95% confidence interval for a two-sided t-test we must use a p-value of 0.975
pval = 0.975