예제 #1
0
def draw_lag_plots(lmp_curves_list, hub_names, lag=24):
    """
    Plots the lag plot to evaluate the time-series data's autocorrelation.

    Parameters
    ----------
    lmp_curves_list : list of arrays
        A list comprised of historic price curves.

    hub_names : list of strings
        Names of the CAISO hubs.

    lag : int
        The difference in time of the two datapoints that are being evaluated for correlation.

    """

    fig, axs = plt.subplots(nrows=1,
                            ncols=len(hub_names),
                            sharey=True,
                            figsize=(20, 6))

    for i, curve, h in zip(range(len(hub_names)), lmp_curves_list, hub_names):
        lag_plot(curve, ax=axs[i], c='green', alpha=0.5, lag=24)
        axs[i].set_title(f"{h} Lag Plot - 24 Hours",
                         fontsize=16,
                         fontweight='bold')

    plt.tight_layout()
    plt.show()
예제 #2
0
def plot_lags_and_auto(df, size=250):
    columns = len(df.columns)
    for col in df.columns:
        fig, ax = plt.subplots(1, 2, figsize=(20, 5))
        fig.suptitle(col, fontsize=16)
        lag_plot(df[col].tail(size), ax=ax[0])
        autocorrelation_plot(df[col].tail(size), ax=ax[1])
 def lag_plot(self, column=None, lag_list=None):
     """
     :param column:
     :param lag_list: receives lag_list if 9 ints
     :return:
     """
     if lag_list:
         if column:
             series = self.data[column]
         else:
             series = self.data[self.data_column]
         f = plt.figure(figsize=(15, 15))
         for index, lag in enumerate(lag_list):
             f.add_subplot(3, 3, index + 1)
             plt.title("lag {}".format(lag))
             lag_plot(series, lag=lag)
             plt.show()
     else:
         if column:
             series = self.data[column]
         else:
             series = self.data[self.data_column]
         lag_list = [int(i) for i in np.linspace(1, len(series), 9)]
         f = plt.figure(figsize=(15, 15))
         for index, lag in enumerate(lag_list):
             f.add_subplot(3, 3, index + 1)
             plt.title("lag {}".format(lag))
             lag_plot(series, lag=lag)
             plt.show()
def arma_fit(option, dataframes, TICK, dayly_returns, pplotacf, ppltopacf,
             destranform_returns, freq_des, lag_acf, lag_pacf, def_seasonal, p,
             q, arma_sintrans):
    #ploteamos la descomposicion seasonal

    #primero se ve si los datos son estacionarios sin formula
    try:
        print(f'p value {TICK} Price: ', adfuller(abs(dataframes[option]))[1])
        test = adfuller(dataframes[option])[1]
    except:
        print(
            "For the nature of the data it is not possible to predict these series with ARMA"
        )

#se grafican las laf
    lag_plot(dataframes[option])

    if test > .05:

        #Grid search
        modelo_altranformarlo(dayly_returns, pplotacf, dataframes, option,
                              plot_lags, lag_acf, lag_pacf, p, q)

    else:

        arma_sintrans(dataframes, lag_acf, lag_pacf, ARMA, option, dataframes,
                      p, q)
예제 #5
0
def lag_plott(df, features, crypto_name, output_path):
    for feature in features:
        df = df.dropna(subset=[feature])
        plt.figure(figsize=(5, 5))
        plt.title("lag_plot_" + feature + "_" + crypto_name)
        lag_plot(df[feature])
        plt.savefig(output_path + crypto_name + "_" + feature + ".png",
                    dpi=120)
예제 #6
0
def autocorrelation(df, ticker):
	plt.rcParams.update({'ytick.left' : False, 'axes.titlepad':10})

	# Plot
	fig, axes = plt.subplots(1, 7, figsize=(12,2), sharex=True, sharey=True, dpi=100)
	for i, ax in enumerate(axes.flatten()[:7]):
		lag_plot(df.Mean, lag=i+1, ax=ax, c='green')
		ax.set_title('Lag ' + str(i+1))

	fig.suptitle(f'The {ticker} stock', y=1.15)    
	plt.show()
예제 #7
0
def plot_Model_Identify(DataSet, frequency=1, acf_lag=12, pacf_lag=12):
    """
    DataSet : dataframe with the type of first column either int()
    or panda datetime
    Frequency : int, Seasonal Component period (in time step)
    """
    # Organize plot
    fig, ax = plt.subplots(3, 4)

    # Plot the Observed Data
    DataSet.plot(ax=ax[0, 0])
    ax[0, 0].set_title('Observed Value')
    ax[0, 0].set_xlabel("")

    # Plot the autocorrelation plot
    autocorrelation_plot(DataSet.iloc[:, 0], ax=ax[1, 0])
    ax[1, 0].set_title('Autocorrelation')

    # Plot the QQ plot
    qqplot(
        DataSet.iloc[:, 0],
        ax=ax[2, 0],
    )
    ax[2, 0].set_title('Q-Q Plot')

    # Lag plot
    lag_plot(DataSet.iloc[:, 0], ax=ax[0, 1])
    ax[0, 1].set_title('Lag Plot')
    ax[0, 1].set_ylabel("")
    ax[0, 1].set_xlabel("")

    # ACF Plot
    tsa.plot_acf(DataSet.iloc[:, 0], ax=ax[1, 1], lags=acf_lag, alpha=0.05)
    ax[1, 1].set_title('ACF')

    # PACF Plot
    tsa.plot_pacf(DataSet.iloc[:, 0], ax=ax[2, 1], lags=pacf_lag, alpha=0.05)
    ax[2, 1].set_title('PACF')

    # decomposition plot
    decomposition = sm.tsa.seasonal_decompose(DataSet.iloc[:, 0],
                                              freq=frequency)
    decomposition.resid.plot(ax=ax[0, 2])
    decomposition.resid.plot(ax=ax[0, 3], kind='kde')
    decomposition.seasonal.plot(ax=ax[1, 2])
    decomposition.trend.plot(ax=ax[2, 2])
    ax[0, 2].set_title('Residual')
    ax[1, 2].set_title('Seasonal')
    ax[2, 2].set_title('Trend')
    ax[0, 3].set_title('Residual Prob. Distrib')

    plt.show()
예제 #8
0
def noise_check(my_data):

    Y1 = int(combo_Y1.get())
    Y2 = int(combo_Y2.get())
    M1 = int(combo_M1.get())
    M2 = int(combo_M2.get())
    D1 = int(combo_D1.get())
    D2 = int(combo_D2.get())
    h1 = int(combo_h1.get())
    h2 = int(combo_h2.get())
    m1 = int(combo_m1.get())
    m2 = int(combo_m2.get())
    s1 = int(combo_s1.get())
    s2 = int(combo_s2.get())

    start = pd.datetime(Y1, M1, D1, h1, m1, s1)
    stop = pd.datetime(Y2, M2, D2, h2, m2, s2)

    the_data = my_data[start:stop]

    plt.figure(figsize=(10, 10))
    plt.suptitle('LAG PLOTS')

    ax1 = plt.subplot(211)
    lag_plot(the_data['back_be'], c='r')

    ax2 = plt.subplot(212)
    lag_plot(the_data['pump_pr'])

    plt.figure(figsize=(8, 8))
    autocorrelation_plot(the_data.pump_pr)

    x = the_data['pump_pr'].values
    f_s = 1
    X = fftpack.fft(x)
    freqs = fftpack.fftfreq(len(x)) * f_s

    fig, ax = plt.subplots()

    ax.plot(freqs, np.abs(X))
    ax.set_xlabel('Frequency in Hertz [Hz]')
    ax.set_ylabel('Frequency Domain (Spectrum) Magnitude')
    ax.set_xlim(0, f_s / 2)
    ##    ax.set_ylim(-5, 110)

    ##    fig, axes=plt.subplots(nrows=1, ncols=2,figsize=(8,8))
    ##    axes[0]=lag_plot(the_data['pump_pr'])
    ##    axes[0].set_title('Lag Plot')
    ##    axes[0]=autocorrelation_plot(the_data.pump_pr)

    plt.show()
예제 #9
0
def setup_arima(train_data, price_col, time_col, **kwargs):

    plt.figure()
    lag_plot(train_data[price_col])
    plt.title('Amazon Stock (Dev Data) - Autocorrelation Plot')
    plt.show()

    fig, ax = plt.subplots()
    ax.plot(train_data[time_col], train_data[price_col])
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    ax.set_title('Amazon Stock (Dev Data) - Minute-by-Minute Closing Prices')
    fig.autofmt_xdate()
    plt.show()
예제 #10
0
파일: electric.py 프로젝트: nilmtk/nilmtk
    def plot_lag(self, lag=1, ax=None):
        """
        Plots a lag plot of power data
        http://www.itl.nist.gov/div898/handbook/eda/section3/lagplot.htm

        Returns
        -------
        matplotlib.axis
        """
        if ax is None:
            ax = plt.gca()
        for power in self.power_series():
            lag_plot(power, lag, ax=ax)
        return ax
예제 #11
0
    def plot_lag(self, lag=1, ax=None):
        """
        Plots a lag plot of power data
        http://www.itl.nist.gov/div898/handbook/eda/section3/lagplot.htm

        Returns
        -------
        matplotlib.axis
        """
        if ax is None:
            ax = plt.gca()
        for power in self.power_series():
            lag_plot(power, lag, ax=ax)
        return ax
def lag_plots(data_df):
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    lag_plot(data_df[data_df.columns[0]], ax=ax1)
    ax1.set_title(data_df.columns[0]);

    lag_plot(data_df[data_df.columns[1]], ax=ax2)
    ax2.set_title(data_df.columns[1]);

    ax1.set_ylabel('$y_{t+1}$');
    ax1.set_xlabel('$y_t$');
    ax2.set_ylabel('$y_{t+1}$');
    ax2.set_xlabel('$y_t$');

    plt.tight_layout()
예제 #13
0
    def scatter_lag_plots(self, data, no_of_lags, col, district):
        fig, axes = plt.subplots(2,
                                 4,
                                 figsize=(15, 8),
                                 sharex=True,
                                 sharey=True,
                                 dpi=100)
        for i, ax in enumerate(axes.flatten()[:no_of_lags]):
            lag_plot(data[col], lag=i + 1, ax=ax, c='red')
            ax.set_title('Lag ' + str(i + 1))

        fig.suptitle('Lag Analysis for Sales with {} lags at {}'.format(
            no_of_lags, district),
                     weight='bold')
        plt.show()

        return fig
def show_1d(s, log_scale=False):

    fig, ax = plt.subplots(1, 3, figsize=FIG_SIZE)

    tmp_s = s
    data_name = ''
    sns.swarmplot(y=tmp_s, ax=ax[0])
    lag_plot(tmp_s, ax=ax[1], alpha=0.5)
    autocorrelation_plot(tmp_s, ax[2], alpha=0.5)
    if log_scale:

        ax[0].set(yscale='symlog')
        ax[1].set(yscale='symlog', xscale='symlog')

    ax[0].set_title(f"{data_name}总体图")
    ax[1].set_title(f"{data_name}时滞图")
    ax[2].set_title(f"{data_name}自相关图")
예제 #15
0
def plot_lag_plots(df, column):
    """
    Lag plots:

    If points get wide and scattered with increasing lag,
    this means lesser correlation
    """

    fig, axes = plt.subplots(1,
                             4,
                             figsize=(10, 3),
                             sharex=True,
                             sharey=True,
                             dpi=100)
    for i, ax in enumerate(axes.flatten()[:4]):
        lag_plot(df[column], lag=i + 1, ax=ax)
        ax.set_title("Lag " + str(i + 1))
    plt.show()
예제 #16
0
def autocorrelation(file):
    '''Checking for autocorrelation within mood variable'''

    df = pd.read_csv(file)

    df = df[["id", "time", "mood"]]

    for id in df["id"].unique():

        series = df[df["id"] == id].mood

        autocorrelation_plot(series)
        plt.title("Autocorrelation plot for user " + id)
        plt.show()

        lag_plot(series)
        plt.xlabel("Mood at current timepoint")
        plt.ylabel("Mood at next timepoint")
        plt.title("Lag plot for user " + id)
        plt.show()
예제 #17
0
def plot_autocorr(args: argparse.Namespace, column: str, df: pd.DataFrame):
    fig = plt.figure(figsize=(8, 9), constrained_layout=True)
    gs = fig.add_gridspec(3, 2)

    ax1 = fig.add_subplot(gs[0, :])
    autocorrelation_plot(df[column], c=colors[0], ax=ax1)
    ax1.spines["left"].set_color("gray")
    ax1.spines["bottom"].set_color("gray")
    ax1.spines["right"].set_visible(False)
    ax1.spines["top"].set_visible(False)

    ax2 = fig.add_subplot(gs[1, 0])
    lag_plot(df[column], lag=100, c=colors[1], ax=ax2, ec="k")
    ax2.spines["left"].set_color("gray")
    ax2.spines["bottom"].set_color("gray")
    ax2.spines["right"].set_visible(False)
    ax2.spines["top"].set_visible(False)

    ax3 = fig.add_subplot(gs[1, 1])
    lag_plot(df[column], lag=200, c=colors[2], ax=ax3, ec="k")
    ax3.spines["left"].set_color("gray")
    ax3.spines["bottom"].set_color("gray")
    ax3.spines["right"].set_visible(False)
    ax3.spines["top"].set_visible(False)

    ax4 = fig.add_subplot(gs[2, 0])
    lag_plot(df[column], lag=400, c=colors[3], ax=ax4, ec="k")
    ax4.spines["left"].set_color("gray")
    ax4.spines["bottom"].set_color("gray")
    ax4.spines["right"].set_visible(False)
    ax4.spines["top"].set_visible(False)

    ax5 = fig.add_subplot(gs[2, 1])
    lag_plot(df[column], lag=500, c=colors[4], ax=ax5, ec="k")
    ax5.spines["left"].set_color("gray")
    ax5.spines["bottom"].set_color("gray")
    ax5.spines["right"].set_visible(False)
    ax5.spines["top"].set_visible(False)

    plt.suptitle(f"ELM ID:46, BES {column}", fontsize=18)
    plt.tight_layout()  # rect=[0, 0.03, 1, 0.95], pad=1.5, h_pad=1.5)
    if not args.dry_run:
        plt.savefig(
            os.path.join(
                args.output_dir,
                f"auto_correlation_plots_elm_id_46_{column}.png",
            ),
            dpi=150,
        )
    plt.show()
예제 #18
0
def plot(file):
    '''
    Vizualisation of the Temperature dataset(csv-file) 
    Parameters
    ----------
    csv_file: str
        The data to load
    Returns
    ---------
    plt.show() 
    '''
    df = pd.read_csv(file, index_col=0, parse_dates=True)
    plt.bar(x='Nan', height=df.isna().sum())
    plt.show()
    
    dates = df.index #assign x
    temp = df['meanT']#assign y

    plt.plot(dates,temp)
    plt.xlabel('dates')
    plt.ylabel('Mean Temp')
    plt.title('Temperature profile in Berlin')
    plt.show()
    
    year_2021 = df.index[-700:]
    temp_2021 = df['meanT'][-700:]
    plt.plot(year_2021,temp_2021)
    plt.xlabel('dates')
    plt.ylabel('Mean Temperature')
    plt.title('Temperature profile in Berlin 2021')
    plt.show()
    
    lag_plot(df)
    plt.show()
    
    autocorrelation_plot(df)
    plt.show()

    plot_acf(df, lags=30)
    plt.show()
예제 #19
0
def showLagPlot(df, numero_hogar, fichero, titulo):

    # Genera el gráfico y se establecen los ejes
    seriesRange00_06 = df['Rango 00-06']
    seriesRange06_12 = df['Rango 06-12']
    seriesRange12_18 = df['Rango 12-18']
    seriesRange18_00 = df['Rango 18-00']

    lag_plot(seriesRange00_06, c="blue")
    lag_plot(seriesRange06_12, c="orange")
    lag_plot(seriesRange12_18, c="green")
    lag_plot(seriesRange18_00, c="red")

    pyplot.title(titulo + numero_hogar, fontsize=16)
    pyplot.xlabel('y (t + 1)', fontsize=14)

    pyplot.ylabel('y (t)', fontsize=14)
    fichero = os.path.join(RUTA_ACTUAL, DIRECTORIO_GUARDADO_IMG,
                           fichero + '_dispersion.png')
    pyplot.savefig(fichero)
    pyplot.show()
예제 #20
0
        fig = plotFigures(
            df_tickername[[
                "close", "MA for 10 days", "MA for 20 days", "MA for 50 days"
            ]],
            f"{ticker_name} Moving Average",
            "Time (in days)",
            "Closing Price",
            f"{ticker_name}_Moving_Average",
        )
        pdf.savefig(fig)
        plt.show()
        plt.close()

        # Auto-Correlation
        plt.figure(figsize=(10, 10))
        lag_plot(df_tickername["open"], lag=5)
        plt.title(f"{ticker_name} Autocorrelation Plot")
        #    plt.savefig(f_getFilePath(f'reports\\figures\\{ticker_name}_Autocorrelation_Plot.png'))
        pdf.savefig()
        plt.show()
        plt.close()

        # Volatility
        fig = plotFigures(
            df_tickername["Volatility"],
            f"{ticker_name} Volatility",
            "Time (in days)",
            "Historical Volatility",
            f"{ticker_name}_Volatility",
        )
        pdf.savefig(fig)
예제 #21
0
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
import os

root_dir = "/home/charan/Documents/workspaces/python_workspaces/Data/BDA_Project"
data_path = "stocks_data/final_stock_consolidated.csv"
data_path = os.path.join(root_dir, data_path)

df = pd.read_csv(data_path)

plt.figure()
lag_plot(df['Open'], lag=3)
plt.title('IBM Stock - Autocorrelation plot with lag = 3')
plt.show()

train_data, test_data = df[0:int(len(df) * 0.7)], df[int(len(df) * 0.7):]
training_data = train_data['Close'].values
test_data = test_data['Close'].values
history = [x for x in training_data]
model_predictions = []
N_test_observations = len(test_data)
for time_point in range(N_test_observations):
    model = ARIMA(history, order=(4, 1, 0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    model_predictions.append(yhat)
예제 #22
0
from pandas.plotting import andrews_curves
from pydataset import data
iris = data('iris')
iris.head()
andrews_curves(iris, 'Species')

#Parallel Coordinates
from pandas.plotting import parallel_coordinates
from pydataset import data
iris = data('iris')
parallel_coordinates(iris, 'Species')

#Lag Plot
from pandas.plotting import lag_plot
spacing = np.linspace(-99 * np.pi, 99 * np.pi, num=1000)
spacing
data = pd.Series(0.1 * np.random.rand(1000) + 0.9 * np.sin(spacing))
data
lag_plot(data)

#Autocorrelation Plot
from pandas.plotting import autocorrelation_plot
spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
data = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
autocorrelation_plot(data)

#Bootstrap Plot
from pandas.plotting import bootstrap_plot
data = pd.Series(np.random.rand(1000))
bootstrap_plot(data, size=50, samples=500, color='grey')
예제 #23
0
stocks.head()
shelter_outcomes = pd.read_csv(
    "../input/austin-animal-center-shelter-outcomes-and/aac_shelter_outcomes.csv",
    parse_dates=['date_of_birth', 'datetime'])
shelter_outcomes = shelter_outcomes[[
    'outcome_type', 'age_upon_outcome', 'datetime', 'animal_type', 'breed',
    'color', 'sex_upon_outcome', 'date_of_birth'
]]
shelter_outcomes.head()
shelter_outcomes['date_of_birth'].value_counts().sort_values().plot.line()
shelter_outcomes['date_of_birth'].value_counts().resample(
    'Y').sum().plot.line()
stocks['volume'].resample('Y').mean().plot.bar()
from pandas.plotting import lag_plot

lag_plot(stocks['volume'].tail(250))
from pandas.plotting import autocorrelation_plot

autocorrelation_plot(stocks['volume'])
import pandas as pd

crypto = pd.read_csv("../input/all-crypto-currencies/crypto-markets.csv")
crypto = crypto[crypto['name'] == 'Bitcoin']
crypto['date'] = pd.to_datetime(crypto['date'])
crypto.head()
from IPython.display import HTML

HTML("""
<ol>
<li>Time-series data is really a special case of interval data.</li>
<br/>
예제 #24
0
파일: matplotlib.py 프로젝트: kjkjv/python
ax.plot_surface(x, y, z)
ax.set_xlabel('CRIM')
ax.set_ylabel('MEDV')
ax.set_zlabel('ZN')
ax.set_title("재광's")
plt.show()

# 1.5 boston_train.csv파일을 읽어와서 지연 plot을 출력하세요
df = pd.read_csv(
    'C:/Users/CPB06GameN/PycharmProjects/GitHub/bigdata/bigdata/파이썬빅데이터분석/boston_train.csv'
)

# 1) 풀이
from pandas.plotting import lag_plot
lag_plot(np.log(df['MEDV']))
lag_plot(np.log(df['CRIM']))
plt.show()

# 2) 풀이
lag_plot(np.log(df['MEDV']))
plt.show()

# 1.6 boston_train.csv 파일을 읽어와서 자기 상관 plot을 출력하세요
df = pd.read_csv(
    'C:/Users/CPB06GameN/PycharmProjects/GitHub/bigdata/bigdata/파이썬빅데이터분석/boston_train.csv'
)
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(np.log(df['MEDV']))
plt.show()
예제 #25
0
years.boxplot(column=["2001", "2002", "2003"], ax=ax4)
ax4.set(xlabel="Years", ylabel="Sales", title="Box and whisker plot")

# Heatmap plot
img5 = ax5.matshow(years, interpolation=None, aspect='auto')
xaxis = [2000, 2001, 2002, 2003]
yaxis = range(-1, 13, 2)
ax5.set(xlabel="Year",
        ylabel="Month",
        xticklabels=xaxis,
        yticklabels=yaxis,
        title="Heatmap plot")
ax5.xaxis.tick_bottom()
fig.colorbar(img5, ax=ax5, aspect=5)

# Lag plot
lag_plot(series_shampoo, ax=ax6)
diagonal = range(int(series_shampoo.min()), int(series_shampoo.max()))
ax6.plot(diagonal, diagonal, '--k')
ax6.set(xlabel="Sales(t)", ylabel="Sales(t+1)", title="Lag plot")

# Autocorrelation plot
autocorrelation_plot(series_shampoo, ax=ax7)
ax7.set(title="Autocorrelation plot", ylim=(-1, 1))

ax8.remove()

fig.subplots_adjust(hspace=0.6)
# plt.tight_layout()
plt.show()
예제 #26
0
def plot_lagplot(df):
    X = df['confirmed']
    lag_plot(X)
    plt.show()
예제 #27
0
파일: plot.py 프로젝트: timcera/tstoolbox
def plot(input_ts='-',
         columns=None,
         start_date=None,
         end_date=None,
         clean=False,
         skiprows=None,
         index_type='datetime',
         names=None,
         ofilename='plot.png',
         type='time',
         xtitle='',
         ytitle='',
         title='',
         figsize='10,6.0',
         legend=None,
         legend_names=None,
         subplots=False,
         sharex=True,
         sharey=False,
         colors='auto',
         linestyles='auto',
         markerstyles=' ',
         style='auto',
         logx=False,
         logy=False,
         xaxis='arithmetic',
         yaxis='arithmetic',
         xlim=None,
         ylim=None,
         secondary_y=False,
         mark_right=True,
         scatter_matrix_diagonal='kde',
         bootstrap_size=50,
         bootstrap_samples=500,
         norm_xaxis=False,
         norm_yaxis=False,
         lognorm_xaxis=False,
         lognorm_yaxis=False,
         xy_match_line='',
         grid=False,
         label_rotation=None,
         label_skip=1,
         force_freq=None,
         drawstyle='default',
         por=False,
         invert_xaxis=False,
         invert_yaxis=False,
         round_index=None,
         plotting_position='weibull',
         source_units=None,
         target_units=None,
         lag_plot_lag=1):
    r"""Plot data."""
    # Need to work around some old option defaults with the implementation of
    # mando
    legend = bool(legend == '' or legend == 'True' or legend is None)

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    from matplotlib.ticker import FixedLocator

    tsd = tsutils.common_kwds(tsutils.read_iso_ts(input_ts,
                                                  skiprows=skiprows,
                                                  names=names,
                                                  index_type=index_type),
                              start_date=start_date,
                              end_date=end_date,
                              pick=columns,
                              round_index=round_index,
                              dropna='all',
                              source_units=source_units,
                              target_units=target_units,
                              clean=clean)

    if type in ['bootstrap',
                'heatmap',
                'autocorrelation',
                'lag_plot']:
        if len(tsd.columns) != 1:
            raise ValueError("""
*
*   The '{1}' plot can only work with 1 time-series in the DataFrame.
*   The DataFrame that you supplied has {0} time-series.
*
""".format(len(tsd.columns), type))

    if por is True:
        tsd = tsutils.common_kwds(tsutils.read_iso_ts(tsd),
                                  start_date=start_date,
                                  end_date=end_date,
                                  round_index=round_index,
                                  dropna='no')

    # This is to help pretty print the frequency
    try:
        try:
            pltfreq = str(tsd.index.freq, 'utf-8').lower()
        except TypeError:
            pltfreq = str(tsd.index.freq).lower()
        if pltfreq.split(' ')[0][1:] == '1':
            beginstr = 3
        else:
            beginstr = 1
        if pltfreq == 'none':
            short_freq = ''
        else:
            # short freq string (day) OR (2 day)
            short_freq = '({0})'.format(pltfreq[beginstr:-1])
    except AttributeError:
        short_freq = ''

    if legend_names:
        lnames = tsutils.make_list(legend_names)
        if len(lnames) != len(set(lnames)):
            raise ValueError("""
*
*   Each name in legend_names must be unique.
*
""")
        if len(tsd.columns) == len(lnames):
            renamedict = dict(list(zip(tsd.columns, lnames)))
        elif type == 'xy' and len(tsd.columns) // 2 == len(lnames):
            renamedict = dict(list(zip(tsd.columns[2::2], lnames[1:])))
            renamedict[tsd.columns[1]] = lnames[0]
        else:
            raise ValueError("""
*
*   For 'legend_names' you must have the same number of comma
*   separated names as columns in the input data.  The input
*   data has {0} where the number of 'legend_names' is {1}.
*
*   If 'xy' type you need to have legend names as x,y1,y2,y3,...
*
""".format(len(tsd.columns), len(lnames)))
        tsd.rename(columns=renamedict, inplace=True)
    else:
        lnames = tsd.columns

    if colors == 'auto':
        colors = color_list
    else:
        colors = tsutils.make_list(colors)

    if linestyles == 'auto':
        linestyles = line_list
    else:
        linestyles = tsutils.make_list(linestyles)

    if markerstyles == 'auto':
        markerstyles = marker_list
    else:
        markerstyles = tsutils.make_list(markerstyles)
        if markerstyles is None:
            markerstyles = ' '

    if style != 'auto':

        nstyle = tsutils.make_list(style)
        if len(nstyle) != len(tsd.columns):
            raise ValueError("""
*
*   You have to have the same number of style strings as time-series to plot.
*   You supplied '{0}' for style which has {1} style strings,
*   but you have {2} time-series.
*
""".format(style, len(nstyle), len(tsd.columns)))
        colors = []
        markerstyles = []
        linestyles = []
        for st in nstyle:
            colors.append(st[0])
            if len(st) == 1:
                markerstyles.append(' ')
                linestyles.append('-')
                continue
            if st[1] in marker_list:
                markerstyles.append(st[1])
                try:
                    linestyles.append(st[2:])
                except IndexError:
                    linestyles.append(' ')
            else:
                markerstyles.append(' ')
                linestyles.append(st[1:])
    if linestyles is None:
        linestyles = [' ']
    else:
        linestyles = [' ' if i == '  ' else i for i in linestyles]
    markerstyles = [' ' if i is None else i for i in markerstyles]

    icolors = itertools.cycle(colors)
    imarkerstyles = itertools.cycle(markerstyles)
    ilinestyles = itertools.cycle(linestyles)

    style = ['{0}{1}{2}'.format(next(icolors),
                                next(imarkerstyles),
                                next(ilinestyles))
             for i in list(range(len(tsd.columns)))]

    # reset to beginning of iterator
    icolors = itertools.cycle(colors)
    imarkerstyles = itertools.cycle(markerstyles)
    ilinestyles = itertools.cycle(linestyles)

    if (logx is True or
            logy is True or
            norm_xaxis is True or
            norm_yaxis is True or
            lognorm_xaxis is True or
            lognorm_yaxis is True):
        warnings.warn("""
*
*   The --logx, --logy, --norm_xaxis, --norm_yaxis, --lognorm_xaxis, and
*   --lognorm_yaxis options are deprecated.
*
*   For --logx use --xaxis="log"
*   For --logy use --yaxis="log"
*   For --norm_xaxis use --type="norm_xaxis"
*   For --norm_yaxis use --type="norm_yaxis"
*   For --lognorm_xaxis use --type="lognorm_xaxis"
*   For --lognorm_yaxis use --type="lognorm_yaxis"
*
""")

    if xaxis == 'log':
        logx = True
    if yaxis == 'log':
        logy = True

    if type in ['norm_xaxis',
                'lognorm_xaxis',
                'weibull_xaxis']:
        xaxis = 'normal'
        if logx is True:
            logx = False
            warnings.warn("""
*
*   The --type={1} cannot also have the xaxis set to {0}.
*   The {0} setting for xaxis is ignored.
*
""".format(xaxis, type))

    if type in ['norm_yaxis',
                'lognorm_yaxis',
                'weibull_yaxis']:
        yaxis = 'normal'
        if logy is True:
            logy = False
            warnings.warn("""
*
*   The --type={1} cannot also have the yaxis set to {0}.
*   The {0} setting for yaxis is ignored.
*
""".format(yaxis, type))

    xlim = _know_your_limits(xlim, axis=xaxis)
    ylim = _know_your_limits(ylim, axis=yaxis)

    figsize = tsutils.make_list(figsize)

    if not isinstance(tsd.index, pd.DatetimeIndex):
        tsd.insert(0, tsd.index.name, tsd.index)

    if type in ['xy',
                'double_mass']:
        if tsd.shape[1] % 2 != 0:
            raise AttributeError("""
*
*   The 'xy' and 'double_mass' types must have an even number of columns
*   arranged as x,y pairs.  You supplied {0} columns.
*
""".format(tsd.shape[1]))
        colcnt = tsd.shape[1] // 2
    elif type in ['norm_xaxis',
                  'norm_yaxis',
                  'lognorm_xaxis',
                  'lognorm_yaxis',
                  'weibull_xaxis',
                  'weibull_yaxis']:
        colcnt = tsd.shape[1]

    if type in ['xy',
                'double_mass',
                'norm_xaxis',
                'norm_yaxis',
                'lognorm_xaxis',
                'lognorm_yaxis',
                'weibull_xaxis',
                'weibull_yaxis',
                'heatmap']:
        _, ax = plt.subplots(figsize=figsize)
        plotdict = {(False, True): ax.semilogy,
                    (True, False): ax.semilogx,
                    (True, True): ax.loglog,
                    (False, False): ax.plot}

    if type == 'time':
        ax = tsd.plot(legend=legend, subplots=subplots, sharex=sharex,
                      sharey=sharey, style=None, logx=logx, logy=logy,
                      xlim=xlim, ylim=ylim, secondary_y=secondary_y,
                      mark_right=mark_right, figsize=figsize,
                      drawstyle=drawstyle)
        for index, line in enumerate(ax.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        xtitle = xtitle or 'Time'
        if legend is True:
            plt.legend(loc='best')
    elif type in ['taylor']:
        from .. skill_metrics import centered_rms_dev
        from .. skill_metrics import taylor_diagram
        ref = tsd.iloc[:, 0]
        std = [pd.np.std(ref)]
        ccoef = [1.0]
        crmsd = [0.0]
        for col in range(1, len(tsd.columns)):
            std.append(pd.np.std(tsd.iloc[:, col]))
            ccoef.append(pd.np.corrcoef(tsd.iloc[:, col],
                                        ref)[0][1])
            crmsd.append(centered_rms_dev(tsd.iloc[:, col].values,
                                          ref.values))
        taylor_diagram(pd.np.array(std),
                       pd.np.array(crmsd),
                       pd.np.array(ccoef))
    elif type in ['target']:
        from .. skill_metrics import centered_rms_dev
        from .. skill_metrics import rmsd
        from .. skill_metrics import bias
        from .. skill_metrics import target_diagram
        biases = []
        rmsds = []
        crmsds = []
        ref = tsd.iloc[:, 0].values
        for col in range(1, len(tsd.columns)):
            biases.append(bias(tsd.iloc[:, col].values, ref))
            crmsds.append(centered_rms_dev(tsd.iloc[:, col].values,
                                           ref))
            rmsds.append(rmsd(tsd.iloc[:, col].values,
                              ref))
        target_diagram(pd.np.array(biases),
                       pd.np.array(crmsds),
                       pd.np.array(rmsds))
    elif type in ['xy',
                  'double_mass']:
        # PANDAS was not doing the right thing with xy plots
        # if you wanted lines between markers.
        # Fell back to using raw matplotlib.
        # Boy I do not like matplotlib.

        for colindex in range(colcnt):
            ndf = tsd.iloc[:, colindex*2:colindex*2 + 2]
            if type == 'double_mass':
                ndf = ndf.dropna().cumsum()
            oxdata = pd.np.array(ndf.iloc[:, 0])
            oydata = pd.np.array(ndf.iloc[:, 1])

            plotdict[(logx, logy)](oxdata,
                                   oydata,
                                   linestyle=next(ilinestyles),
                                   color=next(icolors),
                                   marker=next(imarkerstyles),
                                   label=lnames[colindex],
                                   drawstyle=drawstyle)

        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        if legend is True:
            ax.legend(loc='best')

        if type == 'double_mass':
            xtitle = xtitle or 'Cumulative {0}'.format(tsd.columns[0])
            ytitle = ytitle or 'Cumulative {0}'.format(tsd.columns[1])

    elif type in ['norm_xaxis',
                  'norm_yaxis',
                  'lognorm_xaxis',
                  'lognorm_yaxis',
                  'weibull_xaxis',
                  'weibull_yaxis']:
        ppf = tsutils.set_ppf(type.split('_')[0])
        ys = tsd.iloc[:, :]

        for colindex in range(colcnt):
            oydata = pd.np.array(ys.iloc[:, colindex].dropna())
            oydata = pd.np.sort(oydata)[::-1]
            n = len(oydata)
            norm_axis = ax.xaxis
            oxdata = ppf(tsutils.set_plotting_position(n,
                                                       plotting_position))

            if type in ['norm_yaxis',
                        'lognorm_yaxis',
                        'weibull_yaxis']:
                oxdata, oydata = oydata, oxdata
                norm_axis = ax.yaxis

            plotdict[(logx, logy)](oxdata,
                                   oydata,
                                   linestyle=next(ilinestyles),
                                   color=next(icolors),
                                   marker=next(imarkerstyles),
                                   label=lnames[colindex],
                                   drawstyle=drawstyle)

        # Make it pretty
        xtmaj = pd.np.array([0.01, 0.1, 0.5, 0.9, 0.99])
        xtmaj_str = ['1', '10', '50', '90', '99']
        xtmin = pd.np.concatenate([pd.np.linspace(0.001, 0.01, 10),
                                   pd.np.linspace(0.01, 0.1, 10),
                                   pd.np.linspace(0.1, 0.9, 9),
                                   pd.np.linspace(0.9, 0.99, 10),
                                   pd.np.linspace(0.99, 0.999, 10)])
        xtmaj = ppf(xtmaj)
        xtmin = ppf(xtmin)

        norm_axis.set_major_locator(FixedLocator(xtmaj))
        norm_axis.set_minor_locator(FixedLocator(xtmin))

        if type in ['norm_xaxis',
                    'lognorm_xaxis',
                    'weibull_xaxis']:
            ax.set_xticklabels(xtmaj_str)
            ax.set_ylim(ylim)
            ax.set_xlim(ppf(xlim))

        elif type in ['norm_yaxis',
                      'lognorm_yaxis',
                      'weibull_yaxis']:
            ax.set_yticklabels(xtmaj_str)
            ax.set_xlim(xlim)
            ax.set_ylim(ppf(ylim))

        if type in ['norm_xaxis',
                    'norm_yaxis']:
            xtitle = xtitle or 'Normal Distribution'
            ytitle = ytitle or tsd.columns[0]
        elif type in ['lognorm_xaxis',
                      'lognorm_yaxis']:
            xtitle = xtitle or 'Log Normal Distribution'
            ytitle = ytitle or tsd.columns[0]
        elif type in ['weibull_xaxis',
                      'weibull_yaxis']:
            xtitle = xtitle or 'Weibull Distribution'
            ytitle = ytitle or tsd.columns[0]

        if type in ['norm_yaxis',
                    'lognorm_yaxis',
                    'weibull_yaxis']:
            xtitle, ytitle = ytitle, xtitle

        if legend is True:
            ax.legend(loc='best')

    elif type in ['kde',
                  'probability_density']:
        ax = tsd.plot(kind='kde', legend=legend, subplots=subplots,
                      sharex=sharex, sharey=sharey, style=None, logx=logx,
                      logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y,
                      figsize=figsize)
        for index, line in enumerate(ax.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        ytitle = ytitle or 'Density'
        if legend is True:
            plt.legend(loc='best')
    elif type == 'kde_time':
        from scipy.stats.kde import gaussian_kde
        _, (ax0, ax1) = plt.subplots(nrows=1,
                                     ncols=2,
                                     sharey=True,
                                     figsize=figsize,
                                     gridspec_kw={'width_ratios': [1, 4]})
        tsd.plot(legend=legend, subplots=subplots, sharex=sharex,
                 sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim,
                 ylim=ylim, secondary_y=secondary_y, mark_right=mark_right,
                 figsize=figsize, drawstyle=drawstyle, ax=ax1)
        for index, line in enumerate(ax1.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        xtitle = xtitle or 'Time'
        ylimits = ax1.get_ylim()
        ny = pd.np.linspace(ylimits[0], ylimits[1], 1000)
        for col in range(len(tsd.columns)):
            xvals = tsd.iloc[:, col].dropna().values
            pdf = gaussian_kde(xvals)
            ax0.plot(pdf(ny),
                     ny,
                     linestyle=style[col][2:],
                     color=style[col][0],
                     marker=style[col][1],
                     label=tsd.columns[col],
                     drawstyle=drawstyle)
        ax0.set(xlabel='Probability Density', ylabel=ytitle)
    elif type == 'boxplot':
        tsd.boxplot(figsize=figsize)
    elif type == 'scatter_matrix':
        from pandas.plotting import scatter_matrix
        if scatter_matrix_diagonal == 'probablity_density':
            scatter_matrix_diagonal = 'kde'
        scatter_matrix(tsd,
                       diagonal=scatter_matrix_diagonal,
                       figsize=figsize)
    elif type == 'lag_plot':
        from pandas.plotting import lag_plot
        lag_plot(tsd,
                 lag=lag_plot_lag)
        xtitle = xtitle or 'y(t)'
        ytitle = ytitle or 'y(t+{0})'.format(short_freq or 1)
    elif type == 'autocorrelation':
        from pandas.plotting import autocorrelation_plot
        autocorrelation_plot(tsd)
        xtitle = xtitle or 'Time Lag {0}'.format(short_freq)
    elif type == 'bootstrap':
        from pandas.plotting import bootstrap_plot
        bootstrap_plot(tsd,
                       size=bootstrap_size,
                       samples=bootstrap_samples,
                       color='gray')
    elif type == 'heatmap':
        # Find beginning and end years
        byear = tsd.index[0].year
        eyear = tsd.index[-1].year
        tsd = tsutils.asbestfreq(tsd)
        if tsd.index.freqstr != 'D':
            raise ValueError("""
*
*  The "heatmap" plot type can only work with daily time series.
*
""")
        dr = pd.date_range('{0}-01-01'.format(byear),
                           '{0}-12-31'.format(eyear),
                           freq='D')
        ntsd = tsd.reindex(index=dr)
        groups = ntsd.iloc[:, 0].groupby(pd.TimeGrouper('A'))
        years = pd.DataFrame()
        for name, group in groups:
            ngroup = group.values
            if len(group.values) == 365:
                ngroup = pd.np.append(group.values, [pd.np.nan])
            years[name.year] = ngroup
        years = years.T
        plt.imshow(years,
                   interpolation=None,
                   aspect='auto')
        plt.colorbar()
        yticks = list(range(byear, eyear + 1))
        skip = len(yticks)//20 + 1
        plt.yticks(range(0, len(yticks), skip), yticks[::skip])
        mnths = [0, 30, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
        mnths_labels = ['Jan',
                        'Feb',
                        'Mar',
                        'Apr',
                        'May',
                        'Jun',
                        'Jul',
                        'Aug',
                        'Sep',
                        'Oct',
                        'Nov',
                        'Dec']
        plt.xticks(mnths, mnths_labels)
        grid = False
    elif (type == 'bar' or
          type == 'bar_stacked' or
          type == 'barh' or
          type == 'barh_stacked'):
        stacked = False
        if type[-7:] == 'stacked':
            stacked = True
        kind = 'bar'
        if type[:4] == 'barh':
            kind = 'barh'
        ax = tsd.plot(kind=kind, legend=legend, stacked=stacked,
                      style=style, logx=logx, logy=logy, xlim=xlim,
                      ylim=ylim, figsize=figsize)
        for index, line in enumerate(ax.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        freq = tsutils.asbestfreq(tsd, force_freq=force_freq).index.freqstr
        if freq is not None:
            if 'A' in freq:
                endchar = 4
            elif 'M' in freq:
                endchar = 7
            elif 'D' in freq:
                endchar = 10
            elif 'H' in freq:
                endchar = 13
            else:
                endchar = None
            nticklabels = []
            if kind == 'bar':
                taxis = ax.xaxis
            else:
                taxis = ax.yaxis
            for index, i in enumerate(taxis.get_majorticklabels()):
                if index % label_skip:
                    nticklabels.append(' ')
                else:
                    nticklabels.append(i.get_text()[:endchar])
            taxis.set_ticklabels(nticklabels)
            plt.setp(taxis.get_majorticklabels(), rotation=label_rotation)
        if legend is True:
            plt.legend(loc='best')
    elif type == 'histogram':
        tsd.hist(figsize=figsize)
    else:
        raise ValueError("""
*
*   Plot 'type' {0} is not supported.
*
""".format(type))

    if xy_match_line:
        if isinstance(xy_match_line, str):
            xymsty = xy_match_line
        else:
            xymsty = 'g--'
        nxlim = ax.get_xlim()
        nylim = ax.get_ylim()
        maxt = max(nxlim[1], nylim[1])
        mint = min(nxlim[0], nylim[0])
        ax.plot([mint, maxt], [mint, maxt], xymsty, zorder=1)
        ax.set_ylim(nylim)
        ax.set_xlim(nxlim)

    plt.xlabel(xtitle)
    plt.ylabel(ytitle)

    if invert_xaxis is True:
        plt.gca().invert_xaxis()
    if invert_yaxis is True:
        plt.gca().invert_yaxis()

    plt.grid(grid)

    plt.title(title)
    plt.tight_layout()
    if ofilename is None:
        return plt
    plt.savefig(ofilename)
예제 #28
0
        st.write(
            'We will transform our Data because it is not stacionary, we will apply de following transformation:'
        )
        st.image(Image.open('./imag/trans.png'), caption='Continuous returns')
        st.write(
            'After the tranformation we will apply de adfuller test again.')
        dayly_returns(dataframes, option)
        st.write('P-Value ', TICK, 'Price: ',
                 adfuller(abs(dataframes['trans']))[1])
        U = arma1_fortrans(adfuller, dataframes)
        st.write(U)

        st.write(
            'Based on the following plots choose the p and q for the ARMA model'
        )
        lag_plot(dataframes[option])
        lag_acf = st.slider(
            'Would you choose  the lags for the autocorrlation ', 0, 10, 50)
        lag_pacf = st.slider(
            'Would you choose  the lags for the partial autocallation', 0, 10,
            50)

        #plot_lags(lag_acf,lag_pacf,dataframes)
        o = 'trans'

        try:
            m11 = pplotacf(dataframes, lag_acf, o)
            plt.savefig('./imag/acf.png')
            image11 = Image.open('./imag/acf.png')
            st.image(image11, caption='')
    if valid['Predictions'][i] < valid['Predictions'][i + 1]:
        valid['Predicted'][i + 1] = "Increase"
    else:
        valid['Predicted'][i + 1] = "Decrease"

for i in range(1, len(valid)):
    if valid['Actual'][i] == valid['Predicted'][i]:
        valid['Recommendation'][i] = "Correct"
    else:
        valid['Recommendation'][i] = "Incorrect"

print(valid.tail(10))

# # Autocorrelation
plt.figure(figsize=(10, 10))
lag_plot(df['Close'], lag=5)
plt.title(f'{ticker} Autocorrelation plot')

train_data, test_data = df[0:int(len(df) * 0.8)], df[int(len(df) * 0.8):]
plt.figure(figsize=(16, 8))
plt.title(f'{ticker} Stock Price')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.plot(df['Close'], 'blue', label='Training Data')
plt.plot(test_data['Close'], 'green', label='Testing Data')

plt.legend()

# # Arima

예제 #30
0
plt.figure(figsize=(24, 16))
ax = plt.axes()
ax = sns.heatmap(corr, ax = ax)
ax.set_title('Heatmap - Granger Causality Test Results')
plt.xlabel("Time Series")
plt.ylabel("Time Series")
plt.show()


# In[23]:


## lag plot

plt.figure()
lag_plot(datat)


# In[24]:


print(datat.shape)


# In[25]:


i = 10
datatp = datat.iloc[:, 0:i]

print(df.head())

df = df.set_index(df.columns[0])
print(df.head())

df['hh_sp'].plot(figsize = (14,6), grid=True)
plt.title('Henry Hub Spot Price')
plt.ylabel('Price(Dollars per Million Btu)')
plt.show()

"""We are dealing with discrete parameter process of time series."""

# Plot
fig, axes = plt.subplots(1, 4, figsize=(10,3), sharex=True, sharey=True, dpi=100)
for i, ax in enumerate(axes.flatten()[:4]):
    lag_plot(df, lag=i+1, ax=ax)
    ax.set_title('Lag ' + str(i+1))

fig.suptitle('Lag Plots of Natural Gas \n(Points get wide and scattered with increasing lag -> lesser correlation)\n', y=1.15)    


plt.show()

lag_plot(df)
plt.show()

"""Running the example plots the data (t) on the x-axis against the data on the previous day (t-1) on the y-axis. We can see a large ball of observations along a diagonal line of the plot. It clearly shows a relationship or some correlation."""

from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df.hh_sp)
plt.show()
예제 #32
0
def do_lag_plot(X):
    for i in range(4):
        plt.clf()
        lag_plot(X, lag=i + 1, c='firebrick')
        plt.savefig(folder_out + 'lag_plot_%02d.png' % i)
    return