コード例 #1
0
def test_correct_labels(close_figures, reset_randomstate, line, x_size, y_size,
                        labels):
    rs = np.random.RandomState(9876554)
    x = rs.normal(loc=0, scale=0.1, size=x_size)
    y = rs.standard_t(3, size=y_size)
    pp_x = sm.ProbPlot(x)
    pp_y = sm.ProbPlot(y)
    fig = qqplot_2samples(pp_x, pp_y, line=line, **labels)
    ax = fig.get_axes()[0]
    x_label = ax.get_xlabel()
    y_label = ax.get_ylabel()
    if x_size <= y_size:
        if not labels:
            assert "2nd" in x_label
            assert "1st" in y_label
        else:
            assert "Y" in x_label
            assert "X" in y_label
    else:
        if not labels:
            assert "1st" in x_label
            assert "2nd" in y_label
        else:
            assert "X" in x_label
            assert "Y" in y_label
コード例 #2
0
ファイル: plot.py プロジェクト: wcarthur/extremes
def plotDiagnostics(data, mu, xi, sigma, figfile):
    """
    Create a 4-panel diagnostics plot of the fitted distribution.

    :param data: :class:`numpy.ndarray` of observed data values (in units
                 of metres/second).
    :param float mu: Selected threshold value.
    :param float xi: Fitted shape parameter.
    :param float sigma: Fitted scale parameter.
    :param str figfile: Path to store the file (includes image format)

    """
    LOG.info("Plotting diagnostics")
    fig, ax = plt.subplots(2, 2)
    axes = ax.flatten()
    # Probability plots
    sortedmax = np.sort(data[data > mu])
    gpdf = fittedPDF(data, mu, xi, sigma)
    pp_x = sm.ProbPlot(sortedmax)
    pp_x.ppplot(xlabel="Empirical", ylabel="Model", ax=axes[0], line='45')
    axes[0].set_title("Probability plot")

    prplot = sm.ProbPlot(sortedmax,
                         genpareto,
                         distargs=(xi, ),
                         loc=mu,
                         scale=sigma)
    prplot.qqplot(xlabel="Model", ylabel="Empirical", ax=axes[1], line='45')
    axes[1].set_title("Quantile plot")

    ax2 = axes[2]
    rp = np.array(
        [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000])
    rate = float(len(sortedmax)) / float(len(data))
    rval = returnLevels(rp, mu, xi, sigma, rate)

    emprp = empiricalReturnPeriod(np.sort(data))
    ax2.semilogx(rp, rval, label="Fitted RP curve", color='r')
    ax2.scatter(emprp[emprp > 1],
                np.sort(data)[emprp > 1],
                color='b',
                label="Empirical RP",
                s=100)
    ax2.legend(loc=2)
    ax2.set_xlabel("Return period")
    ax2.set_ylabel("Return level")
    ax2.set_title("Return level plot")
    ax2.grid(True)
    maxbin = 4 * np.ceil(np.floor(data.max() / 4) + 1)
    sns.distplot(sortedmax,
                 bins=np.arange(mu, maxbin, 2),
                 hist=True,
                 axlabel='Wind speed (m/s)',
                 ax=axes[3])
    axes[3].plot(sortedmax, gpdf, color='r')
    axes[3].set_title("Density plot")
    plt.tight_layout()
    plt.savefig(figfile)
    plt.close()
コード例 #3
0
 def setup(self):
     self.data = sm.datasets.longley.load(as_pandas=False)
     self.data.exog = sm.add_constant(self.data.exog, prepend=False)
     self.mod_fit = sm.OLS(self.data.endog, self.data.exog).fit()
     self.res = self.mod_fit.resid
     self.prbplt = sm.ProbPlot(self.mod_fit.resid, stats.t, distargs=(4,))
     self.other_array = np.random.normal(size=self.prbplt.data.shape)
     self.other_prbplot = sm.ProbPlot(self.other_array)
コード例 #4
0
 def qq_plot_2samples(self):
     """
     :return: Q-Q plot between two samples
     """
     self.ax = self.figure.add_subplot(111)
     self.ax.hold(True)
     pp_x = sm.ProbPlot(self.column_data)
     pp_y = sm.ProbPlot(self.var_data)
     qqplot_2samples(pp_x, pp_y, ax=self.ax)
     self.canvas.draw()
コード例 #5
0
ファイル: test_gofplots.py プロジェクト: zhisheng/statsmodels
def test_ProbPlot_comparison_arrays():
    # two fake samples for comparison
    x = np.random.normal(loc=8.25, scale=3.25, size=37)
    y = np.random.normal(loc=8.25, scale=3.25, size=37)
    pp_x = sm.ProbPlot(x)
    pp_y = sm.ProbPlot(y)

    # test `other` kwarg with array
    fig6 = pp_x.qqplot(other=y)
    fig7 = pp_x.ppplot(other=y)
    plt.close('all')
コード例 #6
0
ファイル: test_gofplots.py プロジェクト: zhisheng/statsmodels
def test_ProbPlot_comparison():
    # two fake samples for comparison
    x = np.random.normal(loc=8.25, scale=3.25, size=37)
    y = np.random.normal(loc=8.25, scale=3.25, size=37)
    pp_x = sm.ProbPlot(x)
    pp_y = sm.ProbPlot(y)

    # test `other` kwarg with `ProbPlot` instance
    fig4 = pp_x.qqplot(other=pp_y)
    fig5 = pp_x.ppplot(other=pp_y)

    plt.close('all')
コード例 #7
0
ファイル: test_gofplots.py プロジェクト: zhisheng/statsmodels
def test_qqplot_2samples_arrays():
    #just test that it runs
    x = np.random.normal(loc=8.25, scale=3.25, size=37)
    y = np.random.normal(loc=8.25, scale=3.25, size=37)

    pp_x = sm.ProbPlot(x)
    pp_y = sm.ProbPlot(y)

    # also tests all values for line
    for line in ['r', 'q', '45', 's']:
        # test with arrays
        fig1 = sm.qqplot_2samples(x, y, line=line)

    plt.close('all')
コード例 #8
0
def normality_of_residuals_test(model):
    '''
    Function for drawing the normal QQ-plot of the residuals and running 4 statistical tests to 
    investigate the normality of residuals.
    
    Arg:
    * model - fitted OLS models from statsmodels
    '''

    sm.ProbPlot(model.resid).qqplot(line='s')
    plt.title('Q-Q Plot')

    jb = stats.jarque_bera(model.resid)
    sw = stats.shapiro(model.resid)
    ad = stats.anderson(model.resid, dist='norm')
    ks = stats.kstest(model.resid, 'norm')

    print(f'Jarque_Bera test ---- statistic: {jb[0]:.4f}, p-value: {jb[1]}')
    print(
        f'Shapiro_Wilk test ---- statistic: {sw[0]:.4f}, p-value: {sw[1]:.4f}')
    print(
        f'Kolmogorov_Smirnov test ---- statistic: {ks.statistic:.4f}, p-value: {ks.pvalue:.4f}'
    )
    print(
        f'Anderson_Darling test ---- statistic: {ad.statistic:.4f}, 5% critical value: {ad.critical_values[2]:.4f}'
    )
コード例 #9
0
def pp_plot(residual_values, *, ax=None):
    """P-P plot compares the empirical cumulative distribution function
    against the theoretical cumulative distribution function given a
    specified model.

    The plot is a useful diagnostic to assess whether the assumption of
    linearity holds for a model (more sensitive to non-linearity in the
    middle of the distribution).

    Args:
        residual_values (array): array of residuals from a model
        ax (Axes object): Matplotlib Axes object (optional)

    Returns:
        Figure object
    """

    if ax is None:
        ax = plt.gca()
    prob_plot = sm.ProbPlot(residual_values, fit=True)
    fig = prob_plot.ppplot(ax=ax, color='tab:blue', markersize=4,
                           line='45')  # Figure returned is passed to subplot
    ax.grid(True, linewidth=0.5)
    ax.set_title("P-P Plot of Residuals")
    return fig
コード例 #10
0
ファイル: Part4_GARCH.py プロジェクト: sdtcsyl/MFE-Paper
def garch_plot1(lh):
    # Plot figure with subplots of different sizes
    fig = plt.figure(1)
    # set up subplot grid
    gridspec.GridSpec(3, 2)
    # large subplot
    plt.subplot2grid((3, 2), (0, 0), colspan=2, rowspan=1)
    plt.title('Lean Hogs Time Series Analysis Plots')
    plt.plot(lh)
    # small subplot 1
    plt.subplot2grid((3, 2), (1, 0))
    lag_acf = acf(lh, nlags=40)
    plt.stem(lag_acf)
    plt.axhline(y=0, linestyle='-', color='black')
    plt.axhline(y=-1.96 / np.sqrt(len(lh)), linestyle='--', color='gray')
    plt.axhline(y=1.96 / np.sqrt(len(lh)), linestyle='--', color='gray')
    plt.ylabel('ACF')
    # small subplot 2
    plt.subplot2grid((3, 2), (1, 1))
    lag_pacf = pacf(lh, nlags=40, method='ols')
    plt.stem(lag_pacf)
    plt.axhline(y=0, linestyle='-', color='black')
    plt.axhline(y=-1.96 / np.sqrt(len(lh)), linestyle='--', color='gray')
    plt.axhline(y=1.96 / np.sqrt(len(lh)), linestyle='--', color='gray')
    plt.ylabel('PACF')
    # small subplot 3
    ax0 = plt.subplot2grid((3, 2), (2, 0))
    ax1 = plt.subplot2grid((3, 2), (2, 1))
    probplot = sm.ProbPlot(lh, dist='lognorm', fit=True)
    probplot.ppplot(line='45', ax=ax0)
    probplot.qqplot(line='45', ax=ax1)
    ax0.set_title('P-P Plot')
    ax1.set_title('Q-Q Plot')
    plt.show()
コード例 #11
0
def normality_of_residuals_test(model):
    '''
    Function for drawing the normal QQ-plot of the residuals and running 4 statistical tests to
    investigate the normality of residuals.

    Arg:
    * model - fitted OLS models from statsmodels
    '''
    sm.ProbPlot(model.resid).qqplot(line='s')
    plt.title('Q-Q plot')

    jb = stats.jarque_bera(model.resid)
    sw = stats.shapiro(model.resid)
    ad = stats.anderson(model.resid, dist='norm')
    ks = stats.kstest(model.resid, 'norm')

    print(f'Jarque-Bera test ---- statistic: {jb[0]:.4f}, p-value: {jb[1]}')
    print(
        f'Shapiro-Wilk test ---- statistic: {sw[0]:.4f}, p-value: {sw[1]:.4f}')
    print(
        f'Kolmogorov-Smirnov test ---- statistic: {ks.statistic:.4f}, p-value: {ks.pvalue:.4f}'
    )
    print(
        f'Anderson-Darling test ---- statistic: {ad.statistic:.4f}, 5% critical value: {ad.critical_values[2]:.4f}'
    )
    print(
        'If the returned AD statistic is larger than the critical value, then for the 5% significance level, the null hypothesis that the data come from the Normal distribution should be rejected. '
    )
コード例 #12
0
    def norm_plot(self, x):
        '''Generate subplots of QQPlot and histgram to visualize
        the normality of a variable.
           
        Parameters:
        ----------
        x : list of numpy.ndarray
            The variable to plot
            
        Returns:
        -------
        ax1 : matplotlib.axes
            To plot the QQplot of variable x
        ax2 : matplotlib.axes
            To plot histogram of variable x
        
        Notes:
        -----
        None'''

        fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(8, 3))

        qlt = sm.ProbPlot(x.reshape(-1), fit=True)
        qq = qlt.qqplot(marker='o', color='coral', ax=ax1)
        sm.qqline(qq.axes[0], line='45', fmt='g--')

        ax2.hist(x, color='orange', alpha=.6)

        return ax1, ax2
コード例 #13
0
def animate(i):
    ax0.cla()
    ax1.cla()
    ax2.cla()
    ax4.cla()
    query = ('SELECT * FROM means')
    data = pd.read_sql_query(query, connection)
    x_random_mean= data.means
    ax0.hist(x_random_mean,bins = 6)
        
 
    query2 = ('SELECT * FROM shap_p_values_text')
    data2 = pd.read_sql_query(query2, connection)
    x2 = data2.p_value.values
    ax1.text(0.01, 0.5,f"P values is: {x2[-1]}")
    ax1.axes.get_yaxis().set_visible(False)
    ax1.axis('off')
    
    sm.ProbPlot(x_random_mean).qqplot(line='s', ax=ax2)

    ax3.plot(data2.Id, x2)
        
    query4 = ('SELECT * FROM main_data')
    data4 = pd.read_sql_query(query4, connection)
    data41 = data4.main_value.values
    ax4.hist(data41, bins= 6)
コード例 #14
0
def qq_plot(std_residuals, ax=None):
    """
    Plot quantiles of the normalized residuals.

    The Q–Q plot has use for regression with assumption that the errors
    are normally distributed.  This is the basic assumption for linear
    regression: if the normalized residuals do not follow a normal
    distribution, the interpretation may be affected and the model may
    have a weaken inference.

    The Q–Q-plot depicts the standardized residuals (z-scores) against
    theoretical quantiles of the normal distribution.  Ideally, the
    points should all lie near the 1:1 line (the diagonal line,
    intercept 0 and slope 1).  If the pattern is S-shaped,
    banana-shaped, or too off the diagonal line, you may need to fit a
    different model to the data.

    The top 3 y-axis values are also annotated.

    Parameters
    ----------
    std_residuals : vector
        Vector of the standardized residuals (z-scores).

    ax : matplotlib, optional
        Plot into this axis, otherside otherwise grab the current
        axis or make a new one if not existing.

    See Also
    --------
    std_residual_hist : histogram of normalized residuals.

    References
    ----------
    Crawley (2007) The R Book (1st ed.). Wiley Publishing.

    James, Witten, Hastie & Tibshirani (2014) An Introduction to
    Statistical Learning: With Applications in R. Springer Publishing
    Company, Incorporated.
    """
    qq = sm.ProbPlot(std_residuals)
    fig = qq.qqplot(ax=ax,
                    line='45',
                    alpha=0.7,
                    color='#4C72B0',
                    lw=1,
                    markersize=3)
    if not ax:
        ax = fig.axes[0]
    ax.set_title('Normal Q–Q')
    ax.set_xlabel('Theoretical quantiles')
    ax.set_ylabel('Standardized residuals')
    # Annotations
    abs_norm_resid = np.flip(np.argsort(np.abs(std_residuals)), 0)
    abs_norm_resid_top_3 = abs_norm_resid[:3]
    for rank, i in enumerate(abs_norm_resid_top_3):
        x = np.flip(qq.theoretical_quantiles, 0)[rank]
        y = std_residuals[i]
        ax.text(x, y, i, size=8)
コード例 #15
0
def test_invalid_dist_config(close_figures):
    # GH 4226
    np.random.seed(5)
    data = sm.datasets.longley.load(as_pandas=False)
    data.exog = sm.add_constant(data.exog, prepend=False)
    mod_fit = sm.OLS(data.endog, data.exog).fit()
    with pytest.raises(TypeError, match=r'dist\(0, 1, 4, loc=0, scale=1\)'):
        sm.ProbPlot(mod_fit.resid, stats.t, distargs=(0, 1, 4))
コード例 #16
0
 def setup(self):
     np.random.seed(5)
     self.data = sm.datasets.longley.load(as_pandas=False)
     self.data.exog = sm.add_constant(self.data.exog, prepend=False)
     self.mod_fit = sm.OLS(self.data.endog, self.data.exog).fit()
     self.prbplt = sm.ProbPlot(self.mod_fit.resid, stats.t, distargs=(4,))
     self.line = 'r'
     super(TestProbPlotLongely, self).setup()
コード例 #17
0
ファイル: core.py プロジェクト: jsosa/hydroutils
def plot_quant_probs(data, dist):

    a = sm.ProbPlot(data, dist=dist, fit=True)
    fig = plt.figure()
    ax = fig.add_subplot(1, 2, 1)
    aplt = a.qqplot(ax=ax, line='45')
    ax = fig.add_subplot(1, 2, 2)
    aplt = a.ppplot(ax=ax, line='45')
コード例 #18
0
 def setup(self):
     try:
         import matplotlib.pyplot as plt
         self.fig, self.ax = plt.subplots()
     except ImportError:
         pass
     self.other_array = np.random.normal(size=self.prbplt.data.shape)
     self.other_prbplot = sm.ProbPlot(self.other_array)
コード例 #19
0
ファイル: regression.py プロジェクト: jomey/raster_compare
 def qqplot(self):
     fig = plt.figure(figsize=(12,8))
     probplot = sm.ProbPlot(self.data.band_filtered.compressed())
     ax = fig.gca()
     probplot.qqplot(ax=ax, line='s')
     ax.get_lines()[0].set(markersize=1)
     ax.get_lines()[1].set(color='black', dashes=[4, 1])
     ax.set_title('Normal Q-Q Plot')
     plt.savefig(self.output_path + '/qq_plot.png')
コード例 #20
0
def error_distribution(model, clade, protein):
    '''applys distribution of errors test and saves: should be normally distributed'''
    fig, ax = plt.subplots(1, 1)
    sm.ProbPlot(model.resid).qqplot(line='s', color='#1f77b4', ax=ax)
    ax.title.set_text('QQ Plot')
    fig.savefig(f'error_dist_test_{clade}_{protein}.png')
    plt.show()
    plt.close(fig)
    return
コード例 #21
0
 def pp_plot(self):
     """
     :return: P-P plot
     """
     self.ax = self.figure.add_subplot(111)
     self.ax.hold(True)
     probplot = sm.ProbPlot(self.column_data)
     probplot.ppplot(ax=self.ax, line='45')
     self.canvas.draw()
コード例 #22
0
def compute_quantile_quantile_curve(x):
    print('getting qqplot estimate')
    if not hasattr(defaults, 'figureNumber'):
        defaults.figureNumber = 0
    defaults.figureNumber = defaults.figureNumber + 1
    plt.figure(defaults.figureNumber)
    res = stats.probplot(x, plot=plt)
    res1 = sm.ProbPlot(x, stats.t, fit=True)
    print(res1)
    return res
コード例 #23
0
def qq_plot(residuals):
    fig, ax = plt.subplots(figsize=(8, 5))
    pp = sm.ProbPlot(residuals, fit=True)
    qq = pp.qqplot(color='#1F77B4', alpha=0.8, ax=ax)
    a = ax.get_xlim()[0]
    b = ax.get_xlim()[1]
    ax.plot([a, b], [a, b], color='black', alpha=0.6)
    ax.set_xlim(a, b)
    ax.set_title('Normal Q-Q plot for the residuals', fontsize=12)
    return fig, ax
コード例 #24
0
 def qqplotResiduals(zHat, indVar, depVar):
   residuals = depVar - zHat
   df = np.shape(zHat)[0]
   df -= len(indVar[0])
   
   probPlot = sm.ProbPlot(residuals, t, distargs=(df,))
   probPlot.qqplot()
   
   show()
   return
コード例 #25
0
ファイル: regression.py プロジェクト: dlforrister/snow-aso
 def qqplot(self):
     fig = plt.figure(figsize=(12, 8))
     probplot = sm.ProbPlot(self.raster_difference.elevation.compressed())
     ax = fig.gca()
     probplot.qqplot(ax=ax, line='s')
     ax.get_lines()[0].set(markersize=1)
     ax.get_lines()[1].set(color='black', dashes=[4, 1])
     ax.set_title('Normal Q-Q Plot', **self.title_opts())
     fig.tight_layout()
     plt.savefig(self.output_path + '/qq_plot.png',
                 **self.output_defaults())
コード例 #26
0
def probability_plot(col, df_origin, df_impute):
    '''
    Input: 
        col: A list of columns that need to plot
        df_origin: The original dataframe 
        df_impute: The dataframe after missing value imputation
    Output:
        A large graph containing the respective probability plots (origin vs. impute) of the required columns
    '''

    r, c = len(col) // 4 + 1, 4
    fig = plt.figure(figsize=(c * 8, r * 8))
    for i in range(len(col)):
        feature = col[i]
        pp_origin = sm.ProbPlot(df_origin[feature].dropna(), fit=True)
        pp_impute = sm.ProbPlot(df_impute[feature], fit=True)
        ax = fig.add_subplot(r, c, i + 1)
        pp_origin.ppplot(line="45", other=pp_impute, ax=ax)
        plt.title(f"{feature}, origin vs. impute")

    plt.tight_layout()
コード例 #27
0
ファイル: test_gofplots.py プロジェクト: zhisheng/statsmodels
def test_ProbPlot():
    #just test that it runs
    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog)
    mod_fit = sm.OLS(data.endog, data.exog).fit()
    res = sm.ProbPlot(mod_fit.resid, stats.t, distargs=(4, ))

    # basic tests modeled after example in docstring
    fig1 = res.qqplot(line='r')
    fig2 = res.ppplot(line='r')
    fig3 = res.probplot(line='r')

    plt.close('all')
コード例 #28
0
def distribution_test(dataset, data_name, **kwargs):
    path = os.path.dirname((os.path.abspath(__file__)))

    mat_file = scipy.io.loadmat(dataset)
    data = mat_file['data'].squeeze()

    probplot = sm.ProbPlot(data, scipy.stats.uniform, fit=True)
    probplot.qqplot(line='45')

    plt.savefig(
        os.path.join(path, 'test_result/uniform/{}.png'.format(data_name)))
    plt.title(data_name)
    plt.clf()
コード例 #29
0
def plot_normal_qq(model, ax):
    # Use StatsModels ProbPlot to compute quantiles
    probplot = sm.ProbPlot(model.resid_pearson)

    x, y = probplot.theoretical_quantiles, probplot.sample_quantiles
    ax.plot(x, y, marker='o', markerfacecolor='none', ls='none')

    # Draw 45 degree dotted line
    vmin, vmax = min(np.min(x), np.min(y)), max(np.max(x), np.max(y))
    ax.plot([vmin, vmax], [vmin, vmax], linestyle=':', color='C0')

    ax.set_xlabel('Theoretical Quantiles')
    ax.set_ylabel('Standardized Residuals')
    ax.set_title('Normal Q-Q')
コード例 #30
0
ファイル: utils.py プロジェクト: tao-yu/cox-ingersoll-ross
def show_qqplot(k, lamda, theta, X_0, T, simulated):
    c = (2 * k) / ((1 - np.exp(-k * T)) * theta**2)
    df = 4 * k * lamda / theta**2
    nc = 2 * c * X_0 * np.exp(-k * T)
    pp = sm.ProbPlot(simulated, ncx2, distargs=(df, nc), scale=1 / (2 * c))
    x = pp.theoretical_quantiles
    y = pp.sample_quantiles

    plt.plot(x, y, "bo")
    plt.title("Probability Plot")
    plt.xlabel("Theoretical quantiles")
    plt.ylabel("Sample quantiles")
    x = np.linspace(min(x[0], y[0]), max(x[-1], y[-1]), 2)
    plt.plot(x, x, "k--")
    plt.gca().set_aspect("equal")