Python qqplot Exemples, statsmodels.api.qqplot Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : GBM.py Projet : jgerardsimcock/ml4t

def return_qqplot(data):
    ''' Generates a Q-Q plot of the returns.'''
    plt.figure(figsize=(9, 5))
    sm.qqplot(data['returns'], line='s')
    plt.grid(True)
    plt.xlabel('theoretical quantiles')
    plt.ylabel('sample quantiles')

Exemple #2

0

Afficher le fichier

Fichier : my_plottings_2p.py Projet : Syssy/diplom

def plot_single_peak(peak, ff = False, num_bins = 50, qq = scipy.stats.norm):
    '''Plotte fuer einen Peak das Histogramm sowie qq-Plot zur Verteilung qq
    Besser plot_simlist verwenden, wenn nicht nur gezielt ein Peak angeschaut werden soll, oder Histogrammdarstellung erwuenscht'''
    data = peak
    # Falls from_file gewaehlt, oeffne file
    if ff:
        with open (peak, 'rb') as daten:
            data = pickle.load(daten)
    #Normales Hist plotten
    n, bins, patches = plt.hist(data.times, num_bins, normed=1, alpha=0.5 )
    plt.suptitle("params:" + str(data.params))
    # Jetzt noch ein qq-Plot
    x = np.arange(1, 250, 0.5)
    if qq == scipy.stats.invgauss:
        mu, loc, scale =  scipy.stats.invgauss.fit(data.times)
        logging.log(20, "ig-paramss, %s, %s, %s", str(mu), str(loc), str(scale))
        plt.plot(x,scipy.stats.invgauss.pdf(x,mu, loc, scale))
        logging.log(20,'skew, %s', str(scipy.stats.skew(data.times)))
        sm.qqplot(np.array(data.times), qq, distargs=(mu,),  line = 'r')
        plt.suptitle("params:" + str(data.params) + " qq-Plot mit Normalverteilung" )
    elif qq == scipy.stats.norm:
        sm.qqplot(np.array(data.times), qq, line='r')
        plt.suptitle("params:" + str(data.params) + " qq-Plot mit Inverser Gauss Verteilung: ")
    else: 
        print("not yet implemented, distribution:", qq)
    plt.show()

Exemple #3

0

Afficher le fichier

Fichier : views.py Projet : sursingh/QInvest

def hist(request, sym):
    """create a histogram plot"""
    data = Data(syms=[sym], start=start)
        
    r = data.panel.r.copy()
    r = r.dropna()
    
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 7))
    
    ax = axes[0,0]
    ax.hist(r[sym].values, bins=30)
    r.plot(kind="kde", ax=ax,grid=True)
    r.boxplot(ax=axes[0,1],grid=True)
    r.plot(kind="kde", ax=axes[1,0],grid=True)
    sm.qqplot(r[sym], line='r', fit=True, ax=axes[1,1])
    
    r['mean'] = pandas.rolling_mean(r[sym], 12)
    r['std'] = pandas.rolling_std(r[sym], 12)
    r['cum_ret'] = r[sym].cumsum()
    r[['mean', 'std']].plot(ax=axes[0,2], grid=True, rot=45)
    
    r[['cum_ret']].plot(ax=axes[1,2], grid=True, rot=45)
    
    fig.tight_layout()
    
    fig.set_facecolor((1,.8,.6,0))
    canvas = FigureCanvas(fig)
    response = HttpResponse(content_type='image/png')
    canvas.print_png(response)
    return response

Exemple #4

0

Afficher le fichier

Fichier : plotFit.py Projet : tejaykodali/StatisticalLearning

def plotFit(fit):
  """Create's the 2x2 panel of plots that plot(fit) would create in R"""
  resid = fit.resid
  mu = resid.mean()
  std = resid.std(axis=0)

  #had to write my own normalize function
  def _normalize(resid):
      return (resid-mu)/std
  norm_resid = resid.apply(_normalize)


  f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex='col', sharey='row')

  ax1.scatter(fit.fittedvalues, fit.resid)
  ax1.set_xlabel('Fitted Values')
  ax1.set_ylabel('Residuals')
  ax1.set_title('Residuals vs Fitted')


  sm.qqplot(fit.resid, ax=ax2)
  ax2.set_title('QQ plot')

  ax3.scatter(fit.fittedvalues, norm_resid)
  ax3.set_xlabel('Fitted Values')
  ax3.set_ylabel('Standardized Residuals')
  ax3.set_title('Scale-Location')

  sm.graphics.influence_plot(fit, ax=ax4, criterion="cooks")

  plt.show()

Exemple #5

0

Afficher le fichier

Fichier : plotkram.py Projet : Syssy/Pythonkram

def plot (sim_liste, histogram_separate, histogram_spec, qq_Plot, fit_qq_Plot, num_bins = 50, vergleich= scipy.stats.invgauss):
    startzeit = time.clock()   
    if histogram_spec:
        print "Erstelle Spektrum"
        fig, ax = plt.subplots()
        fig.suptitle("Laenge: "+str(sim_liste[0].length)+" Anz Teilchen: " +str(sim_liste[1].number)) #TODO, gehe hier davon aus, dass gleiche sim-bedingungen vorliegen
        for sim in sim_liste:
            ax.hist(sim.times, num_bins, alpha=0.5, normed = 1, label = str(sim.params) )
       # plt.show()  
        legend = ax.legend(loc='upper right', shadow=True)

    
    # Je Simulation ein Ausgabefenster mit separatem Histogramm/qq-Plot mit gewählten Params/qq mit automatischem Fit 
    number_stats = sum([histogram_separate, qq_Plot, fit_qq_Plot])
    print number_stats
    if histogram_separate or qq_Plot or fit_qq_Plot:
	print "Erstelle separate Dinge"
	for sim in sim_liste:
	    fig = plt.figure(figsize=(4*number_stats, 4))
            gs1 = gridspec.GridSpec(1, number_stats)
            ax_list = [fig.add_subplot(ss) for ss in gs1]
           
	    akt = 0
	    fig.suptitle("ps, pm"+str(sim.params)+str(round(sim.params[0]-sim.params[1],5)), size = 15)
	    if histogram_separate:
		ax_list[akt].hist(sim.times, num_bins)
		ax_list[akt].set_title("Histogramm")
                akt+=1
                
            #print "hist sep", time.clock()-startzeit
	    if qq_Plot:
                sm.qqplot (np.array(sim.times), scipy.stats.norm,  line = 'r', ax=ax_list[akt])
		ax_list[akt].set_title("qq-Plot; norm!! Params: 0.05")
                akt+=1
            #print 'qq 0.05', time.clock()-startzeit
	    if fit_qq_Plot:
		                
                #mu, loc, scale = scipy.stats.invgauss.fit(sim.times)
                #mean, var = scipy.stats.invgauss.stats(mu, loc, scale, moments='mv')
                #print  "params", sim.params, '(mu, loc, scale), mean, var', round(mu, 5), round(loc, 2), round(scale, 2), '\n',  mean, '\n', var
                
                #sm.qqplot (np.array(sim.times), vergleich, fit = True,  line = 'r', ax=ax_list[akt])
		#ax_list[akt].set_title("qq-Plot mit auto Fit")
                #akt+=1 
                sm.qqplot (np.array(sim.times), vergleich, distargs= (sim.mu, ),  line = 'r', ax=ax_list[akt])
		ax_list[akt].set_title("qq-Plot mit mu:" + str(sim.mu))
                akt+=1
            #print "qq plus rechnen", time.clock()-startzeit                

                #fig.subplots_adjust(top=5.85)
            gs1.tight_layout(fig, rect=[0, 0.03, 1, 0.95]) 
            print time.clock()-startzeit
            #plt.tight_layout()
    plt.show()    
       

   
   
    '''x = np.linspace(0, 2*np.pi, 400)

Exemple #6

0

Afficher le fichier

Fichier : plotkram.py Projet : Syssy/diplom

def plot_single_histqq_ff(datei, num_bins=50):
    with open(datei, 'rb') as daten:
        sim = pickle.load(daten)
        n, bins, patches = plt.hist(sim.times, num_bins, normed=1, alpha=0.5 )
        x = np.arange(50000, 250000, 100)
        print "ig-params", scipy.stats.invgauss.fit(sim.times)
        mu, loc, scale =  scipy.stats.invgauss.fit(sim.times)
        plt.plot(x,scipy.stats.invgauss.pdf(x,mu, loc, scale))
        print 'skew', scipy.stats.skew(sim.times)
        
        sm.qqplot(np.array(sim.times), scipy.stats.invgauss, distargs=(mu,),  line = 'r')

Exemple #7

0

Afficher le fichier

def ts_diagnostics(y, lags=None, title='', filename=''):
    '''
    Calculate acf, pacf, qq plot and Augmented Dickey Fuller test for a given time series
    '''
    if not isinstance(y, pd.Series):
        y = pd.Series(y)

    # weekly moving averages (5 day window because of workdays)
    rolling_mean = pd.rolling_mean(y, window=12)
    rolling_std = pd.rolling_std(y, window=12)

    fig = plt.figure(figsize=(14, 12))
    layout = (3, 2)
    ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
    acf_ax = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))
    qq_ax = plt.subplot2grid(layout, (2, 0))
    hist_ax = plt.subplot2grid(layout, (2, 1))

    # time series plot
    y.plot(ax=ts_ax)
    rolling_mean.plot(ax=ts_ax, color='crimson')
    rolling_std.plot(ax=ts_ax, color='darkslateblue')
    plt.legend(loc='best')
    ts_ax.set_title(title, fontsize=24)

    # acf and pacf
    smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
    smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)

    # qq plot
    sm.qqplot(y, line='s', ax=qq_ax)
    qq_ax.set_title('QQ Plot')

    # hist plot
    y.plot(ax=hist_ax, kind='hist', bins=25)
    hist_ax.set_title('Histogram')
    plt.tight_layout()
    # plt.savefig('./img/{}.png'.format(filename))
    plt.show()

    # perform Augmented Dickey Fuller test
    print('Results of Dickey-Fuller test:')
    dftest = adfuller(y, autolag='AIC')
    dfoutput = pd.Series(
        dftest[0:4],
        index=['test statistic', 'p-value', '# of lags', '# of observations'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)
    return

Exemple #8

0

Afficher le fichier

def plot_time_series(data, lags=None, title=None, filename=None):
    """
    Saves time series plot figure of the provided data in filename.

    Parameters
    ==========
    data : series
        One-dimensional ndarray with axis labels (including time series).
    lags : {int, array_like}
        An int or array of lag values, used on horizontal axis.
    title : string
        The title that will be set for the whole figure.
    filename : string
        File to save the plot result
    """

    if not isinstance(data, pd.Series):
        data = pd.Series(data).dropna()

    with plt.style.context('bmh'):
        fig = plt.figure(figsize=(10, 8))
        layout = (3, 2)
        ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1, 0))
        pacf_ax = plt.subplot2grid(layout, (1, 1))
        qq_ax = plt.subplot2grid(layout, (2, 0))
        pp_ax = plt.subplot2grid(layout, (2, 1))

        data.plot(ax=ts_ax)
        ts_ax.set_title(title if title else 'Time Series Analysis Plots')
        smt.graphics.plot_acf(data,
                              lags=lags,
                              ax=acf_ax,
                              alpha=0.5,
                              zero=False)
        smt.graphics.plot_pacf(data,
                               lags=lags,
                               ax=pacf_ax,
                               alpha=0.5,
                               zero=False)
        sm.qqplot(data, line='s', ax=qq_ax)
        qq_ax.set_title('QQ Plot')
        scs.probplot(data, sparams=(data.mean(), data.std()), plot=pp_ax)
        plt.sca(acf_ax)
        plt.xticks(np.arange(1, lags + 1, 2.0))
        plt.sca(pacf_ax)
        plt.xticks(np.arange(1, lags + 1, 2.0))
        plt.tight_layout()

    fig.savefig(filename.lower())
    plt.close()

Exemple #9

0

Afficher le fichier

def qqplot(dataFrame, columns):
    '''qq图'''
    counts = 0
    for i, col in enumerate(columns):
        if i % cell_size == 0:
            fig = plt.figure(figsize=(15, 15))
        ax = fig.add_subplot(col_size, row_size, (i % cell_size) + 1)
        sm.qqplot(dataFrame[col], ax=ax)
        ax.set_title(col)
        if (i + 1) % cell_size == 0 or i + 1 == len(columns):
            counts += 1
            plt.subplots_adjust(wspace=0.3, hspace=0.3)
            plt.savefig('./output/qqplot' + str(counts) + '.png')
            plt.show()

Exemple #10

0

Afficher le fichier

Fichier : a2task1_803.py Projet : dcrainsailing/Asset-Pricing-Code

def residual():
    residual_mean = []
    residual_std = []
    resid = pd.DataFrame()
    resid_lag = pd.DataFrame()
    auto_alpha = []
    auto_pvalue = []

    code_list = [
        'SPY', 'XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY'
    ]
    for code in code_list:
        etf = ETF(code, '2010-01-01', '2019-09-14')
        etf.price_acquire()
        etf.data['ETF_Daily_return'] = (
            etf.data['Close'] / etf.data['Close'].shift(1) - 1)
        etf.data['Date'] = etf.data['Date'].apply(
            lambda x: x.strftime("%Y%m%d"))
        data = pd.DataFrame.merge(etf.data, ff.data, how='left', on='Date')
        data = data.dropna(axis=0, how='any')
        model = OLS(y=data.ETF_Daily_return, x=data[['Mkt_RF', 'SMB', 'HML']])
        resid['' + code + '_resids'] = model.resids
        sm.qqplot(resid['' + code + '_resids'], fit=True, line='45')
        plt.title('Normality test of daily residuals for ETF:' + code + '')
        plt.show()
        residual_mean = residual_mean + [np.mean(resid['' + code + '_resids'])]
        residual_std = residual_std + [np.std(resid['' + code + '_resids'])]
        resid_lag['' + code + '_resids_lag'] = resid['' + code +
                                                     '_resids'].shift(1)
        residual = pd.concat([
            resid_lag['' + code + '_resids_lag'], resid['' + code + '_resids']
        ],
                             axis=1).dropna()
        regress_result = stats.linregress(residual.iloc[:, 0],
                                          residual.iloc[:, 1])
        auto_alpha = auto_alpha + [regress_result.slope]
        auto_pvalue = auto_pvalue + [regress_result.pvalue]

    result = {
        'Code': code_list,
        'E_Mean': residual_mean,
        'E_std': residual_std
    }
    result = pd.DataFrame(result)

    auto = {'Code': code_list, 'Alpha': auto_alpha, 'P_Value': auto_pvalue}
    auto = pd.DataFrame(auto)

    return result, auto

Exemple #11

0

Afficher le fichier

def plot_ic_qq(ic, theoretical_dist=stats.norm, ax=None):
    """
    Plots Spearman Rank Information Coefficient "Q-Q" plot relative to
    a theoretical distribution.

    Parameters
    ----------
    ic : pd.DataFrame
        DataFrame indexed by date, with IC for each forward return.
    theoretical_dist : scipy.stats._continuous_distns
        Continuous distribution generator. scipy.stats.norm and
        scipy.stats.t are popular options.
    ax : matplotlib.Axes, optional
        Axes upon which to plot.

    Returns
    -------
    ax : matplotlib.Axes
        The axes that were plotted on.
    """

    ic = ic.copy()

    num_plots = len(ic.columns)

    v_spaces = ((num_plots - 1) // 3) + 1

    if ax is None:
        f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6))
        ax = ax.flatten()

    if isinstance(theoretical_dist, stats.norm.__class__):
        dist_name = 'Normal'
    elif isinstance(theoretical_dist, stats.t.__class__):
        dist_name = 'T'
    else:
        dist_name = 'Theoretical'

    for a, (period_num, ic) in zip(ax, ic.iteritems()):
        sm.qqplot(ic.replace(np.nan, 0.).values,
                  theoretical_dist,
                  fit=True,
                  line='45',
                  ax=a)
        a.set(title="{} Period IC {} Dist. Q-Q".format(period_num, dist_name),
              ylabel='Observed Quantile',
              xlabel='{} Distribution Quantile'.format(dist_name))

    return ax

Exemple #12

0

Afficher le fichier

def simple_exponential_smoothing():
    N, t, alpha, x0 = 200, 160, 0.5, 20
    realisations = pd.Series(sample_gaussian_process(20, 5, N), range(N))
    forecasts = ses(realisations, alpha, x0, t)
    plot(realisations, forecasts, alpha) 
    forecasts = ses_rolling(realisations, alpha, x0)
    res = residuals(realisations, forecasts)
    print("E[e_t] = "+str(statistics.mean(res)))
    print("Stdev[e_t] = "+str(statistics.stdev(res)))
    standardised_res = standardised_residuals(realisations, forecasts)
    residuals_plot(res)
    residuals_histogram(standardised_res)
    residuals_autocorrelation(res, None)
    sm.qqplot(standardised_res, line ='45') 
    py.show()

Exemple #13

0

Afficher le fichier

Fichier : stat_func.py Projet : Nordant/plantstat

 def QQplot(self, save = False):
     '''
     Function for Q-Q plot visualization.
     Args:
         save - whether to save the output in local directory or not.
     Return: 
         Q-Q plots for each variable.
     '''
     for idx, array in enumerate(self.array):
         fig, ax = plt.subplots(figsize = (7, 5))
         plt.title('Q-Q plot ({})'.format(self.labels[idx]))
         sm.qqplot(np.array(array), line = '45', fit = True, ax = ax)
         if save == True:
             plt.savefig('Q-Q_plot_{}.png'.format(self.labels[idx]), dpi = 200)
         plt.show()

Exemple #14

0

Afficher le fichier

Fichier : main.py Projet : Pengfei-Zhu/DataMining_Assignment

def plot_qq_checkout():

    path = './qq_checkout'
    if os.path.exists(path) == False:
        os.mkdir(path)

    global number_attribute_remove_lost_arr
    for k, v in number_attribute_remove_lost_arr.iteritems():
        sm.qqplot(np.array(v), line='r')
        #plt.xlabel(k)
        plt.title(k)
        plt.grid(True)
        #plt.show()
        plt.savefig(path + '/' + k + '.png')
        plt.close()

Exemple #15

0

Afficher le fichier

Fichier : main.py Projet : LiangYang2836/DataMing

def plot_qq_checkout():

	path = './qq_checkout'
	if os.path.exists(path) == False:
		os.mkdir(path)

	global number_attribute_remove_lost_arr
	for k, v in number_attribute_remove_lost_arr.iteritems():
		sm.qqplot(np.array(v), line='r')
		#plt.xlabel(k)
		plt.title(k)
		plt.grid(True)
		#plt.show()
		plt.savefig(path + '/' + k + '.png')
		plt.close()

Exemple #16

0

Afficher le fichier

Fichier : descriptive.py Projet : wangyundlut/Futures_Quant

 def regression(self):  # 线性回归
     rate1 = self.rate
     rate2 = self.rate2
     model = sm.OLS(rate1, sm.add_constant(rate2)).fit()
     print(model.summary())
     model.fittedvalues  # 查看方程的拟合值
     model.resid  # 回归的残差项
     plt.scatter(model.fittedvalues, model.resid)
     plt.show()
     # 正态性，当因变量成正态分布，模型的残差应该是一个均值为0的正态分布
     # qq图
     sm.qqplot(model.resid_pearson, stats.norm, line='45')
     # 同方差性
     plt.scatter(model.fittedvalues, model.resid_pearson**0.5)
     pass

Exemple #17

0

Afficher le fichier

def graphics(data):

    fig, axes = plt.subplots(nrows=1, ncols=3)
    fig.suptitle("Graphical Analysis")

    axes[0].hist(data, bins=20, alpha=0.8)
    axes[0].set_title("Histogram")

    sns.boxplot(y=data["Close"], ax=axes[1], orient="vertical")
    axes[1].set_title("Boxplot")

    sm.qqplot(data["Close"], ax=axes[2], line="q")
    axes[2].set_title("Q-Q Plot against a normal distribution")

    plt.show()

Exemple #18

0

Afficher le fichier

    def qqplot(x, title='', path=None):
        """
    Q-Q plot

    Parameters
    ----------    
    x  : array_like 
                first group

    Returns
    -------
    None
    """
        sm.qqplot(np.array(x), line='q')
        if (path != None):
            plt.savefig(path + '/qqplot_' + title + '.png')

Exemple #19

0

Afficher le fichier

Fichier : linear_regression.py Projet : sisayfilate/Predicting-College-Admisstions

def sklearn_ols_regression(X,y,print_coefficients=True,print_resid=False,plot_resid=False,qqplot_line='s'):
    """
    ols regression in sklearn
    print: coefficients (optional), regression metrics (optional), qqplot (optional)
    output: SKlearn LinearRegression object
    """
    # initialize a linear regression model in sklearn
    linrig = LinearRegression()
    # fit linear model to training data
    linrig.fit(X, y)
    y_pred = linrig.predict(X)

    if print_coefficients:
        print('Features: ', list(X.columns))
        print('Coefficients: ', linrig.coef_)
        print('y-intercept: ', np.round(linrig.intercept_,3))
        print('\n')

    if print_resid:
        regression_results(y, y_pred)
#         print('MSE: ', mean_squared_error(y, y_pred, multioutput='raw_values'))
    if plot_resid:
        sk_res = pd.Series(data=[np.abs(y - y_pred)])
        #correct this later
        print('QQPLOT OF RESID NOT WORKING. RESID INCORRECT OR WRONG ORDER?\n')
        fig = sm.qqplot(sk_res,line=qqplot_line)
        plt.show()
    return linrig

Exemple #20

0

Afficher le fichier

def check_residuals(resids, **plot_args):
    """可视化残差诊断检验
    
    Args:
        resids: 残差, np.array or pd.Series
        **plot_args: 用于构造figure对象
        
    Returns:
        plt.Figure
    """
    fig, axes = plt.subplots(nrows=2, ncols=2, **plot_args)
    ax1, ax2, ax3, ax4 = axes.flatten()

    # 残差时序图
    ax1 = sns.lineplot(x=range(len(resids)), y=resids, ax=ax1)
    ax1.set(title="Residuals", xlabel="", ylabel="")

    # 残差直方图
    ax2 = sns.histplot(x=resids, kde=True, ax=ax2)
    ax2.set(title="Histogram", xlabel="", ylabel="")

    # QQ plot
    fig = sm.qqplot(resids, fit=True, line="45", ax=ax3)
    ax3.set(title="Normal QQ")

    # 自相关图
    ax4 = plot_acf(resids, ax=ax4, title="ACF")

    plt.tight_layout()

    return fig

Exemple #21

0

Afficher le fichier

Fichier : test_gofplots.py Projet : zhuangqingbin/AutoOM

 def test_qqplot_pltkwargs(self):
     fig = sm.qqplot(self.res,
                     line='r',
                     marker='d',
                     markerfacecolor='cornflowerblue',
                     markeredgecolor='white',
                     alpha=0.5)

Exemple #22

0

Afficher le fichier

def residual_plots():
    
    ''' Plots the OLS residuals vs predictors. Also plots a QQ plot of 
    residuals'''
    
    plt.style.use('ggplot')
    
    aggregated_data = load_concatenated_data()
    lm_ols = smf.ols(formula='y ~ x', data=aggregated_data).fit() # OLS fit
    
    #---------------------------------------------
    # Scatter plot of OLS residuals vs predictors
    #---------------------------------------------
    #plt.scatter(aggregated_data['x'].values, lm_ols.resid)
    plt.scatter(aggregated_data['x'].values, lm_ols.resid**2)
    plt.xlabel('x')
    plt.ylabel('OLS squared residuals')
    plt.savefig('squared_residuals_scatterplot.pdf')
    plt.show()
    
    #----------------------
    ## QQ plot of residuals
    #----------------------
    fig = sm.qqplot(lm_ols.resid, line='s')
    fig.savefig('residuals_QQplot.pdf')
    plt.show(fig)

Exemple #23

0

Afficher le fichier

def shapiro(data):
    '''
    >plot qq plot and pdf
    > shapiro wilk test for normality
    '''

    #qq plot
    sm.qqplot(np.array(data), line='45')
    pylab.show()

    #pdf
    fig, ax = plt.subplots()
    n, bins, patches = ax.hist(data, 40, density=1)

    #shapiro wilk test
    print('shapiro test', stats.shapiro(data))

Exemple #24

0

Afficher le fichier

Fichier : Cookbook.py Projet : PavelSlaby/Python-useful-code-library

def arima_diag(resids, n_lags = 40):
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
    
    r = resids
    resids = (r - np.nanmean(r)) / np.nanstd(r)
    resids_nonmissing = resids[~(np.isnan(resids))]  ######
    
    sns.lineplot(x = np.arange(len(resids)), y = resids, ax = ax1)
    ax1.set_title('Standardized residuals')
    
    x_lim = (-1.96 * 2, 1.96 *2)
    r_range = np.linspace(x_lim[0], x_lim[1])
    norm_pdf = stats.norm.pdf(r_range)
    sns.distplot(resids_nonmissing, norm_hist = True, hist = True, kde = True, ax = ax2)
    
    ax2.plot(r_range, norm_pdf, 'g', lw= 2, label = 'N(0,1)')
    ax2.set_title('Distribution of standardized residuals')
    ax2.set_xlim(x_lim)
    ax2.legend()
    
    qq = sm.qqplot(resids_nonmissing, line = 's', ax = ax3)
    ax3.set_title('Q-Q plot')
    
    plot_acf(resids, ax = ax4, lags = n_lags, alpha = 0.05)
    ax4.set_title('ACF plot')
    
    return fig

Exemple #25

0

Afficher le fichier

Fichier : Visualisation.py Projet : isaaccasm/time_series_python

def plot_QQ(model, fit=False, *args, **kwargs):
    """
    Plot the QQ plot.
    :param model: The statmodel model
    :param fit: When True the line that is shown as True line is the fitting line. This is useful sometimes since a straight
                line different to y=x means that the distribution is probably the same but the parameters are the same
                For instance, a Gaussian with different mean or sigma.
    :param args: Parameters for the qqplot method from statmodels. The most important one is the first parametrs
                which represents a model different to Gaussian (use: scipy.stats.t for t distribution and so on).
                Check: http://www.statsmodels.org/dev/generated/statsmodels.graphics.gofplots.qqplot.html
    :param kwargs: Other parameters for qqplot
    :return: None
    """
    res = model.resid  # residuals
    xmin = np.min(res)
    xmax = np.max(res)

    if 'fit' not in kwargs:
        kwargs['fit'] = fit

    #reg = LinearRegression().fit(np.arange(len(res)).reshape(-1,1), res)
    #print('Fitting line coefficients: {} and intercepts'.format(reg.coef_, reg.intercept_))


    fig = sm.qqplot(res, line='r', *args, **kwargs)
    plt.plot([xmin,xmax],[xmin,xmax], 'r')
    plt.show()

Exemple #26

0

Afficher le fichier

def create_qq_subplots(data, variables):
    fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(20, 15))
    ax = axes.flatten()
    for i in range(len(variables)):
        for label in (ax[i].get_xticklabels() + ax[i].get_yticklabels()):
            label.set_fontsize(12)
        col_name = variables[i]
        sm.qqplot(data[col_name],
                  marker='o',
                  markerfacecolor='none',
                  markeredgecolor='k',
                  alpha=0.5,
                  ax=ax[i])
        ax[i].set_ylabel(col_name, fontsize=18)
        ax[i].set_xlabel("Theoretical Quantiles", fontsize=14)
    return plt

Exemple #27

0

Afficher le fichier

def statistic_plot(log_returns,stock_set):
    for sym in stock_set:
        print("\nResults for symbol %s" % sym)
        print(30 * "-")
        log_data = np.array(log_returns[sym].dropna())
        stc.print_statistics(log_data)

    # 通过qq图检查代码的数据

        # 下面是HS300 对数收益率 分位数-分位数图

        sm.qqplot(log_returns[sym].dropna(), line='s')
        plt.title(sym+'qqplot')
        plt.grid(True)
        plt.xlabel('theoretical quantiles')
        plt.ylabel('sample quantiles')

Exemple #28

0

Afficher le fichier

def qq_plot(depend,features, df):
    df_copy = df.copy()
    fig, ax = plt.subplots(4,2, figsize=(30,30))
    i=0
    for m in range(4):
        for n in range(2):
            if m == 3 and n == 1:
                pass
            else:
                f = '{}~{}'.format(depend, features[i])
                model = smf.ols(formula=f, data=df_copy).fit()
                resid1 = model.resid
                sm.qqplot(resid1, dist=sp.stats.norm, line='45', fit=True, ax=ax[m][n])
                ax[m][n].set_title('{}'.format(features[i]))
                i += 1
    return

Exemple #29

0

Afficher le fichier

Fichier : views.py Projet : SigmaCount/sigmacount

    def RunEstimation(self,request,tsmodelid,tsworkspaceid):
        self.data=pandas.DataFrame()
        self.prepdata(tsmodelid)
        print(self.data)
        ig=lambda x:x
        g=lambda x:x
 
        tsmodel=modeler.ModelClass(data=self.data,startdate=self.startdate,enddate=self.enddate, dependent=self.depVar,exogenous=self.indepVar ,transform=g,inverstransform=ig)
        tsmodel.setmodel(AR=int(self.AR),I=int(self.I),MA=int(self.MA))
        
        
        tsmodel.estimate()
        self.fit=tsmodel.fit
        print tsmodel.fit.summary()
        #return HttpResponseRedirect('/tsbuild/workspace/%s/%s' % (str(tsmodelid),str(tsworkspaceid)))
        confint0=self.fit.conf_int()[0]
        confint1=self.fit.conf_int()[1]
        
        self.SaveValues(tsmodelid,tsworkspaceid,tsmodel.fit)
        
        #QQ Plot
        sm.qqplot(tsmodel.fit.resid)
        plt.savefig('files/%s/%s/qqplot_resid.png' % (tsmodelid, tsworkspaceid))
        plt.clf()
        #In Sample plot
        pdframe=pandas.DataFrame()
        self.data['resid']=tsmodel.fit.resid
        self.data['%s_%s' % (self.depVar[0], 'hat')]=self.fit.fittedvalues
        
        #Plot residuals
        print(tsmodel.fit.resid.index)
        print(tsmodel.fit.resid.values)
        plt.plot(tsmodel.fit.resid.index,tsmodel.fit.resid.values)
        plt.savefig('files/%s/%s/resid.png' % (tsmodelid, tsworkspaceid))
        plt.clf()
        
        plt.plot(tsmodel.fit.fittedvalues.index,tsmodel.fit.fittedvalues.values)
        plt.plot(self.data[self.depVar[0]].index,self.data[self.depVar[0]].values)
        plt.savefig('files/%s/%s/insample.png' % (tsmodelid, tsworkspaceid))
        plt.clf()

        
        return render(request,'tsbuild/arimaSummary.html', {'fit': self.fit,
                                                            'confint0':confint0,
                                                            'confint1':confint1,
                                                            'tsmodelid':tsmodelid,
                                                            'tsworkspaceid':tsworkspaceid} )

Exemple #30

0

Afficher le fichier

Fichier : plotting.py Projet : femtotrader/alphalens

def plot_ic_qq(ic, theoretical_dist=stats.norm, ax=None):
    """
    Plots Spearman Rank Information Coefficient "Q-Q" plot relative to
    a theoretical distribution.

    Parameters
    ----------
    ic : pd.DataFrame
        DataFrame indexed by date, with IC for each forward return.
    theoretical_dist : scipy.stats._continuous_distns
        Continuous distribution generator. scipy.stats.norm and
        scipy.stats.t are popular options.
    ax : matplotlib.Axes, optional
        Axes upon which to plot.

    Returns
    -------
    ax : matplotlib.Axes
        The axes that were plotted on.
    """

    ic = ic.copy()

    num_plots = len(ic.columns)

    v_spaces = ((num_plots - 1) // 3) + 1

    if ax is None:
        f, ax = plt.subplots(v_spaces, 3, figsize=(18, v_spaces * 6))
        ax = ax.flatten()

    if isinstance(theoretical_dist, stats.norm.__class__):
        dist_name = 'Normal'
    elif isinstance(theoretical_dist, stats.t.__class__):
        dist_name = 'T'
    else:
        dist_name = 'Theoretical'

    for a, (period_num, ic) in zip(ax, ic.iteritems()):
        sm.qqplot(ic.replace(np.nan, 0.).values, theoretical_dist, fit=True,
                  line='45', ax=a)
        a.set(title="{} Period IC {} Dist. Q-Q".format(
              period_num, dist_name),
              ylabel='Observed Quantile',
              xlabel='{} Distribution Quantile'.format(dist_name))

    return ax

Exemple #31

0

Afficher le fichier

Fichier : result_analysis_functions.py Projet : charles19920528/Conditional_Independence_Test

def bootstrap_qqplot(data_directory_name: str, scenario: str,
                     result_dict_name: str):
    """

    :param data_directory_name:
    :param scenario:
    :param result_dict_name:
    :return:
    """
    with open(
            f'results/result_dict/{data_directory_name}/bootstrap_refit_reduced_{result_dict_name}_{scenario}_'
            f'result_dict.p', 'rb') as fp:
        bootstrap_result_dict = pickle.load(fp)

    train_p_value_vet = []
    test_p_value_vet = []

    for sample_size in bootstrap_result_dict.keys():
        sample_size_train_p_value_vet = []
        sample_size_test_p_value_vet = []
        for trial_index in bootstrap_result_dict[sample_size].keys():
            sample_size_train_p_value_vet.append(
                bootstrap_result_dict[sample_size][trial_index]
                ["train_p_value"])
            sample_size_test_p_value_vet.append(
                bootstrap_result_dict[sample_size][trial_index]
                ["test_p_value"])

        train_p_value_vet.append(sample_size_train_p_value_vet)
        test_p_value_vet.append(sample_size_test_p_value_vet)

    plt.scatter(train_p_value_vet[1], test_p_value_vet[1])
    fig_1 = sm.qqplot(data=np.array(test_p_value_vet[0]),
                      dist=dist.uniform,
                      line="45")
    plt.title("Train")
    fig_2 = sm.qqplot(data=np.array(test_p_value_vet[1]),
                      dist=dist.uniform,
                      line="45")
    plt.title("Test")

    fig_1.savefig(
        f"results/plots/{data_directory_name}/bootstrap_refit_reduced_{result_dict_name}_train.png"
    )
    fig_2.savefig(
        f"results/plots/{data_directory_name}/bootstrap_refit_reduced_{result_dict_name}_test.png"
    )

Exemple #32

0

Afficher le fichier

Fichier : addmet.py Projet : nmineev/Diploma

def tsplot(y, lags=None, figsize=(10, 8), style='bmh', max_lag=10):
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    with plt.style.context(style):
        fig = plt.figure(figsize=figsize)
        #mpl.rcParams['font.family'] = 'Ubuntu Mono'
        layout = (3, 2)
        ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1, 0))
        pacf_ax = plt.subplot2grid(layout, (1, 1))
        qq_ax = plt.subplot2grid(layout, (2, 0))
        pp_ax = plt.subplot2grid(layout, (2, 1))
        dful_pvalue = np.around(smt.stattools.adfuller(y)[1], 3)
        ACF = smt.stattools.acf(y, nlags=max_lag, qstat=True)
        ARord = np.array([
            i for i in range(0, max_lag + 1)
            if abs(ACF[0][i]) > 2 / np.sqrt(y.shape[0])
        ])
        PACF = smt.stattools.pacf(y, nlags=max_lag)
        MAord = np.array([
            i for i in range(0, max_lag + 1)
            if abs(PACF[i]) > 2 / np.sqrt(y.shape[0])
        ])
        Qstat_pvalue = np.around(ACF[2][max_lag - 1], 3)
        jb_pvalue = sm.stats.stattools.jarque_bera(y)
        jb_pvalue, kurtosis = np.around(jb_pvalue[1],
                                        3), np.around(jb_pvalue[3], 3)

        y.plot(ax=ts_ax)
        ts_ax.set_title(
            'Time Series Analysis Plots\nDickey-Fuller Test: {}'.format(
                dful_pvalue))
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
        smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)
        sm.qqplot(y, line='s', ax=qq_ax)
        scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)
        qq_ax.set_title('QQ Plot\nJarque-Bera Test: {}\nKurtosis: {}'.format(
            jb_pvalue, kurtosis))
        acf_ax.set_title(
            "Autocorrelation\nQ({}): {}\nLast Singf Lag: {}".format(
                max_lag, Qstat_pvalue, max(ARord)))
        pacf_ax.set_title("Partial Autocorrelation\nLast Singf Lag: {}".format(
            max(MAord)))

        plt.tight_layout()
    plt.show()
    return ARord, MAord

Exemple #33

0

Afficher le fichier

def qq_plot(diffs_mean, recall, rt=False):

    sns.set_style('white')

    t = 'recall'
    t1 = ''

    if not recall:
        t = 'recog'

    if rt:
        t1 = '_rt'
    t = t + t1

    fig2, axs = plt.subplots(1, 3, sharex=False)
    fig2.add_subplot(111, frameon=False)

    plt.tick_params(labelcolor='none',
                    top=False,
                    bottom=False,
                    left=False,
                    right=False)
    plt.xlabel("Theoretical Quantiles")
    plt.ylabel("Sample Quantiles")

    title = ['Rest', 'Video', 'Game']
    axs = axs.ravel()

    for i in range(3):

        sm.qqplot(np.array(diffs_mean.iloc[:, i]), line='s', ax=axs[i])

        axs[i].get_lines()[0].set_markersize(5)
        axs[i].get_lines()[0].set_markeredgewidth(0.3)
        axs[i].get_lines()[0].set_markerfacecolor(colors[i])
        axs[i].get_lines()[0].set_markeredgecolor('gray')
        axs[i].get_lines()[1].set_color('gray')

        axs[i].set_xlabel('')
        axs[i].set_ylabel('')

        axs[i].set_title(title[i])

        axs[i].set_xlim(-2, 2)

    plt.tight_layout()
    plt.savefig(F'qq-plot{t}', dpi=300)

Exemple #34

0

Afficher le fichier

Fichier : statstest.py Projet : zennpng/avocado_supply

def qqplotbags(dataframe, bagType, method, regionList):
    for reg in regionList:
        df = dataframe.loc[(dataframe['region'] == reg)
                           & (dataframe['type'] == method)]
        demand = df[bagType].tolist()
        demand.sort()
        demandSorted = pd.DataFrame(demand)
        sm.qqplot(demandSorted, line='s', alpha=0.3)
        plt.title(reg + " " + method + " " + bagType)

        directory = "Data Analysis/BAGS/QQplotsbags/"
        if not os.path.isdir(directory + method + bagType + "/"):
            os.makedirs(directory + method + bagType + "/")

        plt.savefig(directory + method + bagType + "/" + reg + " " + method +
                    " " + bagType)
        plt.show()

Exemple #35

0

Afficher le fichier

Fichier : shapiro_wilk.py Projet : mwytock0812/ny_subway

def q_q_plot(filepath, parameter):
    df = pandas.read_csv(filepath)
    array = df[parameter]
    try:
        fig = sm.qqplot(array, scipy.stats.t, fit=True, line='45')
        plt.show()
    except:
        print "There was an error."

Exemple #36

0

Afficher le fichier

Fichier : fourinone_residual.py Projet : CJRockball/Residual-Overview

def fourinone(res, y_pred2, x):
    ### Residual check
    ### Set up Quad graph
    fig = plt.figure()
    fig.suptitle('Residual Summary', fontsize=16)
    fig.set_facecolor('tan')

    ### QQ-plot
    ax = fig.add_subplot(2, 2, 1)
    sm.qqplot(res, line='s', ax=ax)
    plt.title('QQ plot')

    ### Res vs fitted value
    ax = fig.add_subplot(2, 2, 2)
    ###Horizontal line
    horiz_line_data = np.array([0, 0])
    min_max = np.array([y_pred2.min(), y_pred2.max()])
    ax.plot(min_max, horiz_line_data, 'k--')
    ### Data
    ax.plot(y_pred2, res, 'o', label="data")  # Data
    ax.set_ylabel('Residual')
    ax.set_xlabel('Fitted Value')
    ax.set_title('Residual vs Fitted Value')

    ### Histogram of residuals
    ax = fig.add_subplot(2, 2, 3)
    bins = 12
    plt.hist(res, bins, edgecolor="k", alpha=1)
    #plt.xticks(bins)
    ax.set_ylabel('Frequency')
    ax.set_xlabel('Residual')
    ax.set_title('Histogram')

    ### Residual vs Observation Order
    ax = fig.add_subplot(2, 2, 4)
    horiz_line_data = np.array([0, 0])
    min_max = np.array([x.min(), x.max()])
    ax.plot(min_max, horiz_line_data, 'k--')
    ax.plot(x, res, '-o', label="data")  # Data
    ax.set_ylabel('Residual')
    ax.set_xlabel('Observation Order')
    ax.set_title('Residual vs Observation Order')

    fig.tight_layout()
    fig.show()
    return

Exemple #37

0

Afficher le fichier

Fichier : taseries.py Projet : maoty2011/springboard-capstone1

    def target_dist(self, bins=10, dist=stats.norm):
        # plot the distribution histogram of the target variable
        _ = self.target.hist(bins=bins)
        plt.show()

        # plot the qq plot
        _ = sm.qqplot(self.target, dist, fit=True, line='45')  #,stats.beta
        plt.show()

Exemple #38

0

Afficher le fichier

Fichier : prediction.py Projet : NTAWolf/mscpublic

def plot_model(prediction, y, x):
    fig, axs = sns.plt.subplots(2, 2, figsize=(16, 10))
    axs = axs.flatten()

    resid = pd.Series(y - prediction, index=y.index, name='Residuals')

    resid.hist(bins=40, ax=axs[0])
    axs[0].set_xlabel('Residuals')
    sm.qqplot(resid, line='q', ax=axs[1])
    axs[1].set_xlabel('Residuals')
    tbpd.hist2d(resid, prediction, ax=axs[2],
                vlabel='Residuals', hlabel='Predicted value',
                integer_aligned_bins=True)
    tbpd.hist2d(y, prediction, ax=axs[3],
                vlabel='True value', hlabel='Predicted value',
                integer_aligned_bins=True, sqrt=True)
    fig.tight_layout()

Exemple #39

0

Afficher le fichier

Fichier : stock_assginment2_utils.py Projet : zsb8/Algo-ETL

def hyp_test_pic2(symbol, from_t, to_t):
    """
    画出检验正态分布的图。这是方法二。
    X轴理论分位数，y轴样本分位数.只要不在一条直线上，就表示不符合正态分布
    :param symbol: str
    :param from_t: str
    :param to_t: str
    :return: picture
    """
    sql = f"select * from stock_candles_day where symbol='{symbol}' and dt>='{from_t}' and dt<='{to_t}' order by symbol,series"
    dt = query_dt(sql)
    #计算对数收益率
    a = np.log(dt['c'].pct_change() + 1)
    fix, axes = plt.subplots(1, 1, figsize=(10, 12))
    sm.qqplot(a.dropna(), line='s', ax=axes)
    axes.set_title("hypothesis testing")  #用中文做标题会出错
    return plt.show()

Exemple #40

0

Afficher le fichier

def do_qqplot(data, data_type, d):
    fig = sm.qqplot(data, line='45')
    fig.savefig(
        f"/home/vmargot/Documents/Jussieu/new/{data_type}_{n}_d={d}_qqplot",
        format="svg",
        dpi=300,
    )
    plt.close(fig)

Exemple #41

0

Afficher le fichier

Fichier : test_gofplots.py Projet : CRP/statsmodels

def test_qqplot():
    #just test that it runs
    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog)
    mod_fit = sm.OLS(data.endog, data.exog).fit()
    res = mod_fit.resid
    fig = sm.qqplot(res)

    plt.close(fig)

Exemple #42

0

Afficher le fichier

Fichier : test_gofplots.py Projet : AnaMP/statsmodels

def test_qqplot():
    #just test that it runs
    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog, prepend=False)
    mod_fit = sm.OLS(data.endog, data.exog).fit()
    res = mod_fit.resid
    fig = sm.qqplot(res, line='r')

    plt.close('all')

Exemple #43

0

Afficher le fichier

Fichier : pvalue_beta_plot.py Projet : yarker/MAGeCK_Repo

def plot(file_name,negative_control_gRNAs=None,wald_only=False):
    data=open(file_name,'rb')
    short_file_name=file_name[:file_name.index(".gene_summary.txt")]
    data.readline()
    permute_p_value_list=[]
    wald_p_value_list=[]
    beta_value_list=[]

    if negative_control_gRNAs!=None:
        negative_control_permute_p_value_list=[]
        negative_control_wald_p_value_list=[]
        negative_control_beta_value_list=[]


    for line in data:
        elements=line.decode().strip().split("\t")
        if negative_control_gRNAs!=None and elements[0] in negative_control_gRNAs:
            negative_control_beta_value_list.append(float(elements[2]))
            if wald_only==True:
                negative_control_wald_p_value_list.append(float(elements[4]))
            else:
                negative_control_permute_p_value_list.append(float(elements[4]))
                negative_control_wald_p_value_list.append(float(elements[6]))
        else:
            beta_value_list.append(float(elements[2]))
            if wald_only==True:
                wald_p_value_list.append(float(elements[4]))
            else:
                permute_p_value_list.append(float(elements[4]))
                wald_p_value_list.append(float(elements[6]))
    beta_value_list=[x for x in beta_value_list if str(x) != 'nan' and abs(x)<3]
    wald_p_value_list=[x for x in wald_p_value_list if str(x) != 'nan']
    if negative_control_gRNAs!=None:
        negative_control_beta_value_list=[x for x in beta_value_list if str(x) != 'nan' and abs(x)<3]
        negative_control_wald_p_value_list=[x for x in wald_p_value_list if str(x) != 'nan']

    if wald_only!=True:
        permute_p_value_list=[x for x in permute_p_value_list if str(x) != 'nan']
        stats.probplot(permute_p_value_list, dist="uniform",plot=pylab)
        pylab.savefig("QQplot of permute_p value %s.png" %short_file_name)
        pylab.close()

    pylab.hist(beta_value_list,bins=1000)
    pylab.savefig("Hist of beta value %s.png" %short_file_name)
    pylab.close()

    #stats.probplot(wald_p_value_list, dist="uniform",plot=pylab)
    fig=sm.qqplot(np.array(wald_p_value_list),stats.uniform,fit=True, line='45')
    pylab.xlim(0,1)
    pylab.ylim(0,1)
    #fig.set_xlim(0,1)
    pylab.savefig("QQplot of wald_p value %s.png" %short_file_name)
    pylab.close()
    '''

Exemple #44

0

Afficher le fichier

Fichier : nimbleParser.py Projet : afrendeiro/nimblegen_parser

 def qqPlot(self):
     """ Plots sample signals against theorethical distribution"""
     import statsmodels.api as sm #pandas, patsy
     import matplotlib.pyplot as plt
     data = self.array.probes[:, 2 + self.number]  # add log2
     plt.figure(self.number)
     fig = sm.qqplot(data)
     plt.xlabel('Theoretical quantiles')
     plt.ylabel('Sample quantiles')
     plt.title('Probe intensities for %s' % (self.name))
     plt.savefig("%s_qqprob.png" % (self.name))

Exemple #45

0

Afficher le fichier

Fichier : Forest Fires - week 3.py Projet : MColosso/Forest-Fires

def print_qqplot_and_residuals_plot(model):
    # qq-plot
    ax1 = plt.subplot(1, 3, 1)
    qq_plot = sm.qqplot(model.resid, line = 'r', ax = ax1)
    
    # Residuals plot
    ax2 = plt.subplot(1, 3, 2)
    stdres = pandas.DataFrame(model.resid_pearson)
    residuals_plot = plt.plot(stdres, 'o', ls = 'None')
    plt.axhline(y = 0, color = 'r')
    plt.ylabel('Standarized Residual')
    plt.xlabel('Observation Number')
    
    plt.show()

Exemple #46

0

Afficher le fichier

Fichier : SFERWSimu.py Projet : QuantLet/SFE_class_2017

    def tsplot(y, lags=None, figsize=(10, 8), style='bmh'):
        if not isinstance(y, pd.Series):
            y = pd.Series(y)
        with plt.style.context(style):    
            fig = plt.figure(figsize=figsize)
            #mpl.rcParams['font.family'] = 'Ubuntu Mono'
            layout = (3, 2)
            ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
            acf_ax = plt.subplot2grid(layout, (1, 0))
            pacf_ax = plt.subplot2grid(layout, (1, 1))
            qq_ax = plt.subplot2grid(layout, (2, 0))
            pp_ax = plt.subplot2grid(layout, (2, 1))

            y.plot(ax=ts_ax)
            ts_ax.set_title('Time Series Analysis Plots')
            smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
            smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)
            sm.qqplot(y, line='s', ax=qq_ax)
            qq_ax.set_title('QQ Plot')        
            scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)

            plt.tight_layout()
        return

Exemple #47

0

Afficher le fichier

Fichier : regression_modeling.py Projet : ekolik/-Python-Analysis_of_wine_quality

def mult_regression(wine_set):
    # center quantitative IVs for regression analysis
    w = wine_set['quality']
    wine_set = wine_set - wine_set.mean()
    wine_set['quality'] = w

    print ("OLS multivariate regression model")
    # first i have run with all columns; than chose the most significant for each wine set and rerun:

    if len(wine_set) < 2000:
        # for red
        model1 = smf.ols(
            formula="quality ~ volatile_acidity + chlorides + pH + sulphates + alcohol",
            data=wine_set)
    else:
        # for white
        model1 = smf.ols(
            formula="quality ~ volatile_acidity + density + pH + sulphates + alcohol",
            data=wine_set)

    results1 = model1.fit()
    print(results1.summary())

    # q-q plot for normality
    qq = sm.qqplot(results1.resid, line = 'r')
    plt.show()

    # plot of residuals
    stdres = pd.DataFrame(results1.resid_pearson)
    plt.plot(stdres, 'o', ls = 'None')
    l = plt.axhline(y=0, color = 'r')
    plt.ylabel('Standardized redisual')
    plt.xlabel('Observation number')
    plt.show()

    # # diagnostic plots
    # figure1 = plt.figure(figsize=(12, 8))
    # figure1 = sm.graphics.plot_regress_exog(results1, "alcohol", fig = figure1)
    # plt.show()
    #
    # figure1 = plt.figure(figsize=(12, 8))
    # figure1 = sm.graphics.plot_regress_exog(results1, "sulphates", fig = figure1)
    # plt.show()

    # leverage plot
    figure1 = sm.graphics.influence_plot(results1, size=8)
    plt.show()

Exemple #48

0

Afficher le fichier

Fichier : multipleRegressionModel.py Projet : marlonsvl/multipeRegressionModel

reg2 = smf.ols('lifeexpectancy ~ breastcancerper100th_c + I(breastcancerper100th_c**2)', data=sub1).fit()
print (reg2.summary())


####################################################################################
# EVALUATING MODEL FIT
####################################################################################

# adding alcohol consumption
reg3 = smf.ols('lifeexpectancy ~ breastcancerper100th_c + I(breastcancerper100th_c**2) + breastcancerper100th_c', 
               data=sub1).fit()
print (reg3.summary())


#Q-Q plot for normality
fig4=sm.qqplot(reg3.resid, line='r')

# simple plot of residuals
stdres=pandas.DataFrame(reg3.resid_pearson)
plt.plot(stdres, 'o', ls='None')
l = plt.axhline(y=0, color='r')
plt.ylabel('Standardized Residual')
plt.xlabel('Observation Number')


# additional regression diagnostic plots
fig2 = plt.figure(figsize=(12,8))
fig2 = sm.graphics.plot_regress_exog(reg3,  "breastcancerper100th_c", fig=fig2)

# leverage plot
fig3=sm.graphics.influence_plot(reg3, size=8)

Exemple #49

0

Afficher le fichier

Fichier : build_model.py Projet : SGShuman/ground_game

def plot_box_resids(fit_model, y_pred, subset=None):
    '''More than you ever wanted to know about your residuals'''
    s_resid = (fit_model.resid - np.mean(fit_model.resid)) /\
               np.var(fit_model.resid)
    if subset:
        s_resid = np.random.choice(s_resid,
                                  replace=False,
                                  size=math.floor(len(s_resid) * subset))
    df = pd.DataFrame(s_resid, columns=['resids'])
    temp_df = pd.DataFrame(y_pred, columns=['target'])
    df = df.join(temp_df)

    if min(y_pred) < -1:
        df['turnout_bucket'] = df['target']\
        .apply(lambda x: int(math.floor(10 * np.exp(x))))
        y = df['target'].apply(lambda x: np.exp(x))
    else:
        df['turnout_bucket'] = df['target']\
        .apply(lambda x: int(math.floor(10 * x)))
        y = df['target']

    posit = sorted(df['turnout_bucket'].unique())

    plt.scatter(y, s_resid, alpha=.2)
    slope, intercept = np.polyfit(y, s_resid, 1)
    plt.plot(y, np.poly1d(np.polyfit(y, s_resid, 1))(y))
    plt.title('Studentized Residuals vs Prediction')
    plt.xlabel('Predicted Value')
    plt.ylabel('Studentized Residual')
    print 'Slope of best fit line: %s' % slope
    plt.show()

    ax1 = df[['resids', 'turnout_bucket']]\
        .boxplot(by='turnout_bucket', positions=posit, widths=.5)
    plt.title('Residuals versus Turnout')
    plt.xlabel('Turnout Bucket')
    plt.ylabel('Studentized Residuals')
    plt.suptitle('')
    plt.show()

    fig = sm.qqplot(s_resid, line='s')
    plt.title('Q-Q Plot')
    plt.show()

    w, p_val = shapiro(s_resid)
    print 'Shapiro-Wilk P_val is %s, larger the better' % p_val

    k, p_val = normaltest(s_resid)
    print 'D’Agostino and Pearson’s P_val is %s, larger the better' % p_val

    k, p_val = kstest(s_resid, 'norm')
    print 'Kolmogorov–Smirnov P_val is %s, larger the better' % p_val

    A, critical, sig = anderson(s_resid)
    print 'Anderson-Darling A2 is %s, smaller the better' % A
    print critical
    print sig

    n, bins, patches = plt.hist(s_resid, 75, normed=1)
    mu = np.mean(s_resid)
    sigma = np.std(s_resid)
    plt.plot(bins, mlab.normpdf(bins, mu, sigma))
    plt.title('Residuals versus a Normal Dist')
    plt.show()

    df['turnout_bucket'].hist(bins=posit, align='left', color='b')
    plt.title('Histogram of Turnout Bucket')
    plt.ylabel('Count')
    plt.xlim(-.5, - .5 + len(posit))

    temp = df[['resids', 'turnout_bucket']].groupby('turnout_bucket').count()
    temp.columns = ['Count']
    plt.show()
    print temp

Exemple #50

0

Afficher le fichier

Fichier : visualizations.py Projet : smcdonald2013/hdstats-framework

 def plot(self):
     """Makes the plot."""
     sm.qqplot(self.data, fit=True, line='s')
     plt.show()

Exemple #51

0

Afficher le fichier

Fichier : visualizeresids.py Projet : Quantia-Analytics/AzureML-Regression-Example

def azureml_main(BikeShare):
    import matplotlib
    matplotlib.use('agg')  # Set backend
    matplotlib.rcParams.update({'font.size': 20})
    
    import matplotlib.pyplot as plt
    import statsmodels.api as sm
    
    Azure = False

## Sort the data frame based on the dayCount
    BikeShare.sort('dayCount',  axis = 0, inplace = True) 

## Compute the residuals.
    BikeShare['Resids'] = BikeShare['Scored Label Mean'] - BikeShare['cnt']   
    
## Plot the residuals vs the label, the count of rented bikes.
    fig = plt.figure(figsize=(8, 6))
    fig.clf()
    ax = fig.gca()
## PLot the residuals.    
    BikeShare.plot(kind = 'scatter', x = 'cnt', y = 'Resids', 
                   alpha = 0.05, color = 'red', ax = ax)              
    plt.xlabel("Bike demand")
    plt.ylabel("Residual")
    plt.title("Residuals vs demand")
    plt.show()
    if(Azure == True): fig.savefig('scatter1.png')
    

## Make time series plots of actual bike demand and 
## predicted demand by times of the day.    
    times = [7, 9, 12, 15, 18, 20, 22]
    for tm in times:
        fig = plt.figure(figsize=(8, 6))
        fig.clf()
        ax = fig.gca()
        BikeShare[BikeShare.hr == tm].plot(kind = 'line', 
                                           x = 'dayCount', y = 'cnt',
                                           ax = ax)          
        BikeShare[BikeShare.hr == tm].plot(kind = 'line', 
                                           x = 'dayCount', y = 'Scored Label Mean',
                                           color = 'red', ax = ax)                                    
        plt.xlabel("Days from start of plot")
        plt.ylabel("Count of bikes rented")
        plt.title("Bikes rented by days for hour = " + str(tm))
        plt.show()
        if(Azure == True): fig.savefig('tsplot' + str(tm) + '.png')
 
## Boxplots to for the residuals by hour and transformed hour.
    labels = ["Box plots of residuals by hour of the day \n\n",
            "Box plots of residuals by transformed hour of the day \n\n"]
    xAxes = ["hr", "xformWorkHr"]
    for lab, xaxs in zip(labels, xAxes):
        fig = plt.figure(figsize=(12, 6))
        fig.clf()
        ax = fig.gca()  
        BikeShare.boxplot(column = ['Resids'], by = [xaxs], ax = ax)   
        plt.xlabel('')
        plt.ylabel('Residuals')
        plt.show() 
        if(Azure == True): fig.savefig('boxplot' + xaxs + '.png')
     
## QQ Normal plot of residuals    
    fig = plt.figure(figsize = (6,6))
    fig.clf()
    ax = fig.gca()
    sm.qqplot(BikeShare['Resids'], ax = ax)
    ax.set_title('QQ Normal plot of residuals')
    if(Azure == True): fig.savefig('QQ.png')
    if(Azure == True): fig.savefig('QQ1.png')

## Histograms of the residuals
    fig = plt.figure(figsize = (8,6))
    fig.clf()
    fig.clf()
    ax = fig.gca()
    ax.hist(BikeShare['Resids'].as_matrix(), bins = 40)
    ax.set_xlabel("Residuals")
    ax.set_ylabel("Density")
    ax.set_title("Histogram of residuals")
    if(Azure == True): fig.savefig('hist.png')   

    return BikeShare

Exemple #52

0

Afficher le fichier

Fichier : Kaggle_DataScienceLondon.py Projet : soumil-jain/ipython-notebooks

comb.boxplot(column=[0])


## Q-Q Plot 

##### In statistics, a Q–Q plot ("Q" stands for quantile) is a probability plot, which is a graphical method for comparing two probability distributions by plotting their quantiles against each other. If the two distributions being compared are similar, the points in the Q–Q plot will approximately lie on the line y = x. If the distributions are linearly related, the points in the Q–Q plot will approximately lie on a line, but not necessarily on the line y = x.

# In[266]:

import statsmodels.api as sm


# In[269]:

sm.qqplot(comb[1],line='45')


# In[275]:

os.getcwd()


# In[287]:

for i in np.arange(0,40,1):
    pieces1='histograms/histogram',format(i),'.jpg'
    hist=comb[i].hist()
    fig = hist.get_figure()
    fig.savefig(''.join(pieces1))
    fig.clear()

Exemple #53

0

Afficher le fichier

Fichier : analysis.py Projet : zk12001/-

import numpy as np
from scipy.interpolate import interp1d
import sys

def load():
    return  pd.read_excel("Analysis.xls",header = None)
    
data = load() 
for i in range(17):
    print data[i].dropna().describe()


for i in range(3,11):
    plt.hist(data[i].dropna())
    plt.show()
    sm.qqplot(data[i], line='q')
    plt.show()
    plt.boxplot(data[i].dropna())
    plt.show()

for i in range(11,18):
    h = data[data[2].isin(['high'])][i]
    m = data[data[2].isin(['medium'])][i]
    l = data[data[2].isin(['low'])][i]
    d = [np.asarray(h),np.asarray(m),np.asarray(l)]
    plt.boxplot(d)
    plt.show()

def getmaxcorr(dt,index):
    max = -1.0
    pos = 0;

Exemple #54

0

Afficher le fichier

Fichier : sig_diff_than_zero.py Projet : csu-hmc/gait-control-direct-id-paper

sig_marks = {}

for speed, event in groups.groups.keys():

    group = groups.get_group((speed, event))

    index = []
    t_vals = []
    p_vals = []

    for col in group.columns:

        if col.startswith('k_'):

            # plot the quantiles plot to see if the data is normally distributed
            fig = qqplot(group[col], line='45')
            plot_dir = os.path.join(PATHS['figures_dir'], 'quantile-plots',
                                    event, structure, '{:1.1f}'.format(speed))
            plot_dir = utils.mkdir(plot_dir)
            fig.savefig(os.path.join(plot_dir, '{}.png'.format(col)))
            plt.close(fig)

            # compute the t statistic to see if the value is significantly
            # different than zero
            t_stat, p_val = ttest_1samp(group[col], 0.0)

            index.append(col)
            t_vals.append(t_stat)
            p_vals.append(p_val)

    #mark = np.zeros((num_schedules, num_sensors, num_actuators), dtype=bool)

Exemple #55

0

Afficher le fichier

Fichier : plotkram.py Projet : Syssy/diplom

def plot_qq(datei, qq_Plot, fit_qq_Plot, vergleich = scipy.stats.invgauss):
    with open(datei, 'rb') as csvfile:
        myreader = csv.reader(csvfile, delimiter = ";",quoting=csv.QUOTE_NONE)
        liste = []
        # Erstelle Liste wie oben
        for row in myreader:
            unterliste = []
            for r in row:
                r2 = float(r)
                unterliste.append(r2)
            liste.append(unterliste)

    # Und einen qq-Plot erstellen, evtl Parameter zur vergleichsfunktion müssen
    # per Hand eingestellt werden
    if qq_Plot:
        print "erstelle qq-Plot",
        fig = plt.figure()
        ax = fig.add_subplot(221)
        sm.qqplot (np.array(liste[0]), vergleich, distargs= (0.005,),  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[0]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "nr2",
        ax = fig.add_subplot(222)
        sm.qqplot (np.array(liste[1]), vergleich, distargs= (0.005,),  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[1]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "nr3",
        ax = fig.add_subplot(223)
        sm.qqplot (np.array(liste[2]), vergleich, distargs= (0.005,),  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[2]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "nr4",
        ax = fig.add_subplot(224)
        sm.qqplot (np.array(liste[3]), vergleich, distargs= (0.005,),  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[3]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "qqplot erstellt"

    # qq-Plot mit automatischem fit zur Vergleichsfunktion
    if fit_qq_Plot:
        print "erstelle fit-qq-plot", 
        fig = plt.figure()
        ax = fig.add_subplot(221)
        sm.qqplot (np.array(liste[0]), vergleich, fit = True,  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[0]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "nr2",
        ax = fig.add_subplot(222)
        sm.qqplot (np.array(liste[1]), vergleich, fit = True,  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[1]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "nr3",
        ax = fig.add_subplot(223)
        sm.qqplot (np.array(liste[2]), vergleich, fit = True,  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[2]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "nr4",
        ax = fig.add_subplot(224)
        sm.qqplot (np.array(liste[3]), vergleich, fit = True,  line = 'r', ax =ax)
        #txt = ax.text(-1.8, 3500, str(params[3]) ,verticalalignment='top')
        #txt.set_bbox(dict(facecolor='k', alpha=0.1))
        print "qqplot erstellt"

    plt.show()

Exemple #56

0

Afficher le fichier

Fichier : Code III Week3.py Projet : aelai/Coursera-Data-Analysis-and-Interpretation-

#Adding employement rate 
print ("Association Between Urban Rate, Life Expectancy, Income, CO2 Emissions, Alcohol, Employment and Breast Cancers Rate")
reg6 = smf.ols('breastcancer ~ urbanrate_c + lifeexpect_c + co2emissions_c + income_c + alcconsumption_c + employrate_c', data=gapmind1).fit()
print (reg6.summary())
#%%
#%%
#Keep only significant variables in the model 
print ("Association Between Income, Alcohol and Breast Cancers Rate")
reg7 = smf.ols('breastcancer ~ income_c + alcconsumption_c', data=gapmind1).fit()
print (reg7.summary())
####################################################################################
# EVALUATING MODEL FIT
####################################################################################
#%%
#Q-Q plot for normality
fig1=sm.qqplot(reg7.resid, line='r')
#%%
# simple plot of residuals
stdres=pandas.DataFrame(reg7.resid_pearson)
fig2 = plt.plot(stdres, 'o', ls='None')
l = plt.axhline(y=0, color='r')
plt.ylabel('Standardized Residual')
plt.xlabel('Observation Number')
print (fig2)
#%%
"""
# additional regression diagnostic plots
# For alcohol consumption 
fig3 = plt.figure(figsize=(12,8)) 
fig3 = sm.graphics.plot_regress_exog(reg7, 'alcconsumption_c', fig=fig3)
#%%

Exemple #57

0

Afficher le fichier

Fichier : Probabilistic Modelling Notebook.py Projet : vipmax/PM

from scipy import stats
import matplotlib.pyplot as plt

params = stats.f.fit(sample)
print(params)
fig = plt.figure(8, figsize=(10, 10))
ax = fig.add_subplot(111)
res = stats.probplot(sample, dist=stats.f, sparams=params, plot=ax)
plt.show()


# In[13]:

import statsmodels.api as sm

fig = sm.qqplot(sample, stats.genextreme, fit=True, line='45')
plt.show()

fig = sm.qqplot(sample, stats.lognorm, fit=True, line='45')
plt.show()

fig = sm.qqplot(sample, stats.f, fit=True, line='45')
plt.show()


# In[16]:

# Computes the Kolmogorov-Smirnov statistic on 2 samples.

#     This is a two-sided test for the null hypothesis that 2 independent samples
#     are drawn from the same continuous distribution.

Exemple #58

0

Afficher le fichier

Fichier : pregunta1.py Projet : juanfel/tarea2-analisis-inteligente-de-datos

# Funcion que realiza las iteraciones del cross validation
def itcrossval(kf, X, Y):
	k_fold = cross_validation.KFold(len(X),kf)
	mse_cv = 0
	for k, (train, val) in enumerate(k_fold):
		linreg = lm.LinearRegression(fit_intercept = False)
		linreg.fit(X[train], Y[train])
		yhat_val = linreg.predict(X[val])
		mse_fold = mean_squared_error(Y[val], yhat_val)
		mse_cv += mse_fold
	mse_cv = mse_cv / kf
	return mse_cv

# Validacion cruzada para k=5
print "mse para training con k=5: ", itcrossval(5, Xm, ym)

# Validacion cruzada para k=10
print "mse para training con k=10: ", itcrossval(10, Xm, ym)


######## Pregunta (j) ############################################################

# Se calcula el error de prediccion sobre todos los datos de entrenamiento
errorp = ytrain - yhat_train
print "Error de prediccion sobre training set: \n", errorp

# Se realiza un quantile-quntile plot
graf = sm.qqplot(yhat_train - ytrain, fit=True, line='45') 
plt.show()

Exemple #59

0

Afficher le fichier

Fichier : test_gofplots.py Projet : ChadFulton/statsmodels

 def test_qqplot(self, close_figures):
     sm.qqplot(self.res, line='r')

Exemple #60

0

Afficher le fichier

Fichier : test_gofplots.py Projet : ChadFulton/statsmodels

 def test_qqplot_pltkwargs(self, close_figures):
     sm.qqplot(self.res, line='r', marker='d',
               markerfacecolor='cornflowerblue',
               markeredgecolor='white',
               alpha=0.5)