Python autocorrelation_plot Exemples, pandas.tools.plotting.autocorrelation_plot Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : math_tool.py Projet : SeleneLI/Atlas

def csv_file_pick_rtt_series():
    with open(JSON2CSV_FILE_ALL) as f_handler:
        next(f_handler)
        for line in f_handler:
            dest = line.split(';')[1]
            probe = line.split(';')[2]
            rtt_series_one_line = [float(lines) for lines in line.split(';')[3:] if float(lines)!= -1]

            if len(rtt_series_one_line) != 0:

                if ACTION == "periodicity":
                    # print "probe:", probe
                    # print "dest:", dest
                    # print "rtt_series_one_line", rtt_series_one_line
                    plot_fft_autocorr(rtt_series_one_line, dest, probe)
                elif ACTION == "rtt_statistics":
                    rtt_statistics(rtt_series_one_line, dest, probe)
                elif ACTION == "autocorr_plot":
                    # print "probe:", probe
                    # print "dest:", dest
                    # print "rtt_series_one_line", rtt_series_one_line
                    autocorrelation_plot(pd.Series(rtt_series_one_line))
                    plt.show()

            else:
                print probe, GENERATE_TYPE, dest, "is an empty list"

Exemple #2

0

Afficher le fichier

Fichier : plot_trajectories.py Projet : bshapiro/disease-time-series

def autocorr_plot(clusters, data, savedir=None):
    n = len(clusters)
    x = math.sqrt(n)
    x = int(x)

    y = 1
    while x * y < n:
        y += 1

    pallete = sns.color_palette("hls", n)
    k = 0

    plt.figure()
    autocorrelation_plot(data.T)
    savepath = '/'.join(directory.split('/') + ['all_autocorr.png'])
    plt.savefig(savepath)
    plt.close()

    fig, axarr = plt.subplots(x, y, sharex=True, sharey=True)
    i = 0
    j = 0
    k = 0
    for cid, cluster in clusters.iteritems():
        ax = axarr[i, j]
        autocorrelation_plot(data.loc[cluster, :].T, ax=ax)
        k += 1
        i = (i + 1) % x
        if i == 0:
            j = (j + 1) % y

    savepath = '/'.join(directory.split('/') + ['incluster_autocorr.png'])
    plt.savefig(savepath)
    plt.close()

Exemple #3

0

Afficher le fichier

Fichier : stats.py Projet : ShashankTekriwal/Political

def autocorrelation(array, name):
	fig = plt.figure()
	autocorrelation_plot(array)
	plt.legend([name], loc = 'upper left')
	# plt.show()
	fig.savefig("Election_13/stats/"+name+".png", bbox_inches='tight')
	plt.clf()
	fig.clf()

Exemple #4

0

Afficher le fichier

Fichier : timeseries.py Projet : orazaro/kgml

def ts_plots(rets, figsize=(12, 10)):
    import matplotlib.pyplot as plt
    fig, axarr = plt.subplots(2, 2, sharex=False, sharey=False,
                              figsize=figsize)
    axgen = (e for e in np.array(axarr).ravel())

    rets.plot(kind='line', ax=axgen.next())  # .set_title("data")
    rets.plot(kind='hist', bins=50, ax=axgen.next())  # .set_title("histogram")
    # rets.plot(kind='density',ax=axgen.next()).set_title("density")
    lag_plot(rets, lag=1, ax=axgen.next())  # .set_title("")
    autocorrelation_plot(rets, ax=axgen.next())

Exemple #5

0

Afficher le fichier

Fichier : corellation.py Projet : strategist922/sonar

def process_trace(connection, name):
    print 'Downloading...'
    timeSeries = connection.demand(name)
    print 'complete'

    time = np.zeros(len(timeSeries.elements))
    demand = np.zeros(len(timeSeries.elements))
    for i in range(0, len(timeSeries.elements)):
        time[i] = timeSeries.elements[i].timestamp
        demand[i] = timeSeries.elements[i].value
        
    
    mean = np.mean(demand)
    
    print len(demand)
    
    #demand = demand - mean
    
    
    # http://www.simafore.com/blog/bid/105815/Time-series-analysis-using-R-for-cost-forecasting-models-in-8-steps
    
    
    # demand = np.array([np.sin(i) for i in range(0,500)])
    
    # demand = np.log10(demand)    
    # result = np.correlate(demand, demand, 'full')
    # result = result[0:len(result)]
    
    from scipy import signal
    # demand = sp.signal.detrend(demand, axis=0)
    
    t = np.arange(len(demand))
    sp = np.fft.fft(demand)
    freq = np.fft.fftfreq(t.shape[-1])
    
    from pandas.tools.plotting import autocorrelation_plot
    autocorrelation_plot(demand)
        
    fig = plt.figure()
    ax = fig.add_subplot(111)
    # ax.plot(range(0, len(ff)), ff)
    ax.plot(freq, sp.real, freq, sp.imag)
    # ax.acorr(demand, maxlags=700)
    
    
    
    
    print 'pandas'    
    from pandas import Series
    s = Series(demand, index=range(0, len(demand)))
    corr = s.autocorr()
    print corr
    
    plt.show()

Exemple #6

0

Afficher le fichier

Fichier : electric.py Projet : vermaarjun7/nilmtk

    def plot_autocorrelation(self):
        """
        Plots autocorrelation of power data 
        Reference: 
        http://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm

        Returns
        -------
        matplotlib.axis 
        """
        fig, ax = plt.subplots()
        for power in self.power_series():
            autocorrelation_plot(power, ax = ax)
        return ax

Exemple #7

0

Afficher le fichier

Fichier : test_graphics_others.py Projet : 8ballbb/ProjectRothar

    def test_autocorrelation_plot(self):
        from pandas.tools.plotting import autocorrelation_plot
        _check_plot_works(autocorrelation_plot, series=self.ts)
        _check_plot_works(autocorrelation_plot, series=self.ts.values)

        ax = autocorrelation_plot(self.ts, label='Test')
        self._check_legend_labels(ax, labels=['Test'])

Exemple #8

0

Afficher le fichier

Fichier : test_graphics.py Projet : FashtimeDotCom/pandas

    def test_autocorrelation_plot(self):
        from pandas.tools.plotting import autocorrelation_plot
        _check_plot_works(autocorrelation_plot, self.ts)
        _check_plot_works(autocorrelation_plot, self.ts.values)

        ax = autocorrelation_plot(self.ts, label='Test')
        t = ax.get_legend().get_texts()[0].get_text()
        self.assertEqual(t, 'Test')

Exemple #9

0

Afficher le fichier

Fichier : _5_readHDF.py Projet : capitalk/analysis

def plotAutocorrelation(pth, bucketName):
    df = pd.read_hdf(pth+bucketName,'capitalKDF')
    autocorrelation_plot(df['A','p','1'], plt.subplot(2,2,1))
    plt.title("Lag plot for best ask price")
    autocorrelation_plot(df['A','v','1'], plt.subplot(2,2,2))
    plt.title("Lag plot for best ask volume")
    autocorrelation_plot(df['B','p','1'], plt.subplot(2,2,3))
    plt.title("Lag plot for best bid price")
    autocorrelation_plot(df['B','v','1'], plt.subplot(2,2,4))
    plt.title("Lag plot for best bid volume")
    plt.show()

Exemple #10

0

Afficher le fichier

Fichier : AutoCorrelationPlot.py Projet : sernst/Cadence

    def _plot(self):
        """_plot doc..."""

        data = pd.Series(np.asarray(self.data))

        pl = self.pl
        ax = pl.gca()
        pdPlot.autocorrelation_plot(data, ax=ax)

        if self.xScale != 1.0:
            formatter = FuncFormatter(self._scaleTickMark)
            ax.get_xaxis().set_major_formatter(formatter)

        pl.title(self.title)
        pl.xlabel(self.xLabel)
        pl.ylabel(self.yLabel)
        if self.xLimits:
            pl.xlim(*self.xLimits)
        if self.yLimits:
            pl.ylim(*self.yLimits)
        pl.grid(True)

Exemple #11

0

Afficher le fichier

Fichier : all_convergence_tests.py Projet : jrleja/threedhst_bsfh

def pandas_autocorr(chain, labels, plt_label):

	from pandas.tools.plotting import autocorrelation_plot # autocorrelation plot

	npars = chain.shape[1]
	cmap = get_cmap(npars)

	# plot autocorrelation lag
	plt.figure(figsize=(16,6))
	h = [autocorrelation_plot(chain[nburn:,i], color=cmap(i), lw=5, alpha=0.8, label=labels[i])
	     for i in xrange(npars)]

	plt.legend(loc=1, fontsize=14,ncol=3,numpoints=1,markerscale=0.7)
	plt.tight_layout()
	plt.savefig('pandas_autocorrelation_'+plt_label+'.png',dpi=150)
	plt.close()

Exemple #12

0

Afficher le fichier

Fichier : crowdsignals_ch8_regression.py Projet : davidstap/ML4QS

features_after_chapter_3 = list(set().union(basic_features, pca_features))
features_after_chapter_4 = list(set().union(basic_features, pca_features, time_features, freq_features))
features_after_chapter_5 = list(set().union(basic_features, pca_features, time_features, freq_features, cluster_features))

selected_features = ['temp_pattern_labelOnTable','labelOnTable', 'temp_pattern_labelOnTable(b)labelOnTable', 'cluster',
                     'pca_1_temp_mean_ws_120','pca_2_temp_mean_ws_120','pca_2','acc_watch_y_temp_mean_ws_120','gyr_watch_y_pse',
                     'gyr_watch_x_pse']
possible_feature_sets = [basic_features, features_after_chapter_3, features_after_chapter_4, features_after_chapter_5, selected_features]
feature_names = ['initial set', 'Chapter 3', 'Chapter 4', 'Chapter 5', 'Selected features']

# Let us first study whether the time series is stationary and what the autocorrelations are.

dftest = adfuller(dataset['hr_watch_rate'], autolag='AIC')
print dftest

autocorrelation_plot(dataset['hr_watch_rate'])
plot.show()

# Now let us focus on the learning part.

learner = TemporalRegressionAlgorithms()
eval = RegressionEvaluation()

# We repeat the experiment a number of times to get a bit more robust data as the initialization of the NN is random.

repeats = 5

# we set a washout time to give the NN's the time to stabilize. We do not compute the error during the washout time.

washout_time = 10

Exemple #13

0

Afficher le fichier

Fichier : bike-sharing.py Projet : linnylin92/kaggle-2

ftest['count'] = reg + cas
ftest['count'] = ftest['count'].clip(0, np.max(train['count']))
ftest[['count']].to_csv('submission-02.csv')


# ## Exploring Autocorrelation

# In[73]:

import matplotlib.pyplot as plt
from pandas.tools.plotting import autocorrelation_plot

# Registered & Casual on Workingdays
fig, axes = plt.subplots(ncols=3, nrows=8, figsize=(16, 16))
for h in range(24):
    autocorrelation_plot(train.registered[train.hour == h][train.workingday == 1], ax=axes[int(h / 3.0)][h % 3], color='c')
    autocorrelation_plot(train.casual[    train.hour == h][train.workingday == 1], ax=axes[int(h / 3.0)][h % 3], color='m')


# In[72]:

import matplotlib.pyplot as plt
from pandas.tools.plotting import autocorrelation_plot

# Registered & Casual on Non-Workingdays
fig, axes = plt.subplots(ncols=3, nrows=8, figsize=(16, 16))
for h in range(24):
    autocorrelation_plot(train.registered[train.hour ==  h][train.workingday == 0], ax=axes[int(h / 3.0)][h % 3], color='c')
    autocorrelation_plot(train.casual[    train.hour ==  h][train.workingday == 0], ax=axes[int(h / 3.0)][h % 3], color='m')

Exemple #14

0

Afficher le fichier

Fichier : plot_clusters.py Projet : bshapiro/disease-time-series

def gen_cluster_plots(cluster_directory_root, depth):
    # load data
    gc, mt, track = load_data(None, 0)
    data = pd.concat([gc.data, mt.data])

    labels = data.index.values
    pos_labels = labels + '+'
    neg_labels = labels + '-'
    pos_data = pd.DataFrame(data=data.as_matrix(), index=pos_labels,
                            columns=data.columns.values)
    neg_data = pd.DataFrame(data=data.as_matrix(), index=neg_labels,
                            columns=data.columns.values)

    data = pd.concat([data, pos_data, neg_data])

    generic_dir = cluster_directory_root.split('/') + (['*'] * depth)
    generic_dir = ('/').join(generic_dir)
    cluster_directories = \
        glob.glob(generic_dir)

    clusterings = {}
    clusterings_models = {}
    for cluster_dir in cluster_directories:
        try:
            clustering_id = cluster_dir.split('/')[-1:][0]
            # read final clusters
            clusters = {}
            filepath = '/'.join(cluster_dir.split('/') + ['assignments.txt'])
            lines = (open(filepath, 'r').read().splitlines())
            l = 0
            while l < len(lines):
                cluster_name = lines[l]
                cluster_members = lines[l + 1].split('\t')
                clusters[cluster_name] = cluster_members
                l += 4

            clusterings[clustering_id] = clusters

            # load models
            models = {}
            model_files = glob.glob(cluster_dir + '/*')
            for model_file in model_files:
                try:
                    model_id = model_file.split('/')[-1:][0]
                    json = open(model_file).read()
                    models[model_id] = HiddenMarkovModel.from_json(json)
                    print 'model loaded from: ', model_file
                except:
                    pass
            clusterings_models[clustering_id] = models
        except:
            pass

    background = set()
    for clustering in clusterings.itervalues():
        for cid, members in clustering.iteritems():
            background.update(set(members))

    background = list(background)
    # data = data.loc[background, :]

    # generate ranomd clusterings of the same size k as our models
    for clustering_id, clustering in clusterings.iteritems():
        for model_id, members in clustering.iteritems():
            sequences = data.loc[members, :]
            pltdir = '/'.join(cluster_directory_root.split('/') + ['plots'])

            # make line plots directory
            if not os.path.isdir(pltdir + '/line'):
                print "Creating directory...", pltdir
                os.mkdir(pltdir + '/line')

            savename = pltdir + '/line/' + model_id + '_lineplot'

            plt_title = model_id + ' Line Plot'
            ax = sequences.T.plot(legend=False, rot=2)
            ax.set_title(plt_title)
            ax.set_xlabel('Timepoint')
            ax.set_ylabel('Normalized Expression')

            print 'Saving: ', savename
            fig = ax.get_figure()
            fig.savefig(savename)
            fig.clear()

            # make autocorr plots directory
            if not os.path.isdir(pltdir + '/autocorr'):
                print "Creating directory...", pltdir
                os.mkdir(pltdir + '/autocorr')

            savename = pltdir + '/autocorr/' + model_id + '_autocorr'

            plt_title = model_id + ' Autocorr Plot'
            for seq in sequences.index:
                ax = autocorrelation_plot(sequences.loc[seq])
            ax.set_title(plt_title)

            print 'Saving: ', savename
            fig = ax.get_figure()
            fig.savefig(savename)
            fig.clear()

            # make lag plots directory
            if not os.path.isdir(pltdir + '/lag'):
                print "Creating directory...", pltdir
                os.mkdir(pltdir + '/lag')

            from pylab import *
            NUM_COLORS = len(members)
            cm = get_cmap('gist_rainbow')
            colors = []
            for i in range(NUM_COLORS):
                colors.append(cm(1.*i/NUM_COLORS))

            savename = pltdir + '/lag/' + model_id + '_lagplot'

            plt_title = model_id + ' Lag Plot'
            for i, seq in enumerate(sequences.index):
                ax = lag_plot(sequences.loc[seq], c=colors[i])
            ax.set_title(plt_title)

            print 'Saving: ', savename
            fig = ax.get_figure()
            fig.savefig(savename)
            fig.clear()

            """

Exemple #15

0

Afficher le fichier

Fichier : time_series_arima_chaotic.py Projet : kcavagnolo/ml_fun

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from scipy.interpolate import spline
from pandas.tools.plotting import autocorrelation_plot
from statsmodels.tsa.arima_model import ARIMA
from scipy.stats import gaussian_kde
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

def norm(x):
    return (x-np.min(x))/(np.max(x)-np.min(x))

dataframe = pd.read_csv('Chaotic_TimeSeries_turkey_elec.csv')
dataframe.head()
plt.plot(dataframe)
autocorrelation_plot(dataframe.ix[:,0])

### AVALIAR V3 LINHAS
model00 = ARIMA(np.array(dataframe.ix[:,0]), dates=None,order=(2,1,0))
model11 = model00.fit(disp=1)
model11.summary()
model11.forecast()
resid9=model11.resid
np.mean(abs(resid9))/max(np.array(dataframe.ix[:,0]))

x3 = resid9
x3 = x3[numpy.logical_not(numpy.isnan(x3))]
dftest13 = adfuller(x3, autolag='AIC')
dfoutput1 = pd.Series(dftest13[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
print('Dickey Fuller Test:\n',dfoutput1)

Exemple #16

0

Afficher le fichier

Fichier : Budget.py Projet : JohnHeinitz/DataCamp

from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

# Read in the data
data = pd.read_csv("Budget_test.csv", index_col=0)

# print(data.head())
data.index = pd.to_datetime(data.index)
data.columns = ['WRVU Production']

plt.plot(data)
plt.ylabel('wrvus')
plt.show()

autocorrelation_plot(data)
pyplot.show()

model = ARIMA(data, order=(5, 1, 0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())

x = data.values
size = int(len(x) * 0.5)

Exemple #17

0

Afficher le fichier

Fichier : pandas_plots.py Projet : kmels/Learning-NumPy-Array-Example-Code

import numpy as np
import pandas as pd
import sys
from datetime import datetime as dt
import matplotlib.pyplot as plt
from pandas.tools.plotting import lag_plot
from pandas.tools.plotting import autocorrelation_plot
 
to_date = lambda x: dt.strptime(x, "%Y%m%d").toordinal()
 
dates, avg_temp = np.loadtxt(sys.argv[1], delimiter=',', usecols=(1, 11), unpack=True, converters={1: to_date})
dtidx = pd.DatetimeIndex([dt.fromordinal(int(date)) for date in dates])
data = pd.Series(avg_temp * .1, index=dtidx)
 
fig = plt.figure()
fig.add_subplot(211)
lag_plot(data)
 
plt.figure()
autocorrelation_plot(data)
 
plt.figure()
resampled = data.resample('A')
resampled.plot()
plt.show()

Exemple #18

0

Afficher le fichier

Fichier : main.py Projet : SajadAzami/Machine_Learning-Training

line = np.linspace(0, 336, 336)
plt.plot(line, label[0:336])
plt.xlabel('Hour')
plt.ylabel('Power Demand')
plt.title('Power Demand of first 14 days')
plt.show()
lag_plot(label)

# Plotting the lag plot of target feature
plt.title('Lag plot of Power Demand')
plt.xlabel('P(t)')
plt.ylabel('P(t+1)')
plt.show()

# Plotting auto-correlation
autocorrelation_plot(label[0:1000])
plt.show()

# Splitting train and test data
train_data, test_data = data[0:119832], data[119832:]
train_label, test_label = label[0:119832], label[119832:]

# Implementing Persistence Model
df = pd.concat([label.shift(48), label], axis=1)
df.columns = ['t-1', 't+1']
X = df.values
train, test = X[0:119832], X[127656:]
train_X, train_y = train[:, 0], train[:, 1]
test_X, test_y = test[:, 0], test[:, 1]

Exemple #19

0

Afficher le fichier

Fichier : dua.py Projet : tjipenk/py_proj

import matplotlib.pyplot as plt
import statsmodels.api as sm
from pandas.tools.plotting import autocorrelation_plot
from mpi4py import MPI

dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d')

file = sys.argv[1]
jenis = sys.argv[2]
data = pd.read_csv(file, index_col='tanggal', date_parser=dateparse)

dt = np.log(data[jenis])

dt.plot(label='Data '+jenis+' Pengamatan')
plt.savefig('grafik_'+jenis+'.png', transparent=False)
autocorrelation_plot(dt)
plt.savefig('grafik_autocorelation_'+jenis+'.png', transparent=False)
sm.graphics.tsa.plot_acf(dt, lags=40)
plt.savefig('grafik_acf_'+jenis+'.png', transparent=False)
sm.graphics.tsa.plot_pacf(dt, lags=40)
plt.savefig('grafik_pacf_'+jenis+'.png', transparent=False)
#ts.adfuller(dt, 1)
if MPI.COMM_WORLD.Get_rank()==0:
	arima_mod1 = sm.tsa.ARIMA(dt, (3,0,2)).fit(trend='nc' , disp = False)
print(arima_mod1.params)
#print (arima_mod1.params)
sm.stats.durbin_watson(arima_mod1.resid.values)
#ws.to_csv("Arima_resid"+jenis+".csv")
print(arima_mod1.aic)
print(arima_mod1.bic)
#print("HQIC: "+ arima_mod1.hqic)

Exemple #20

0

Afficher le fichier

Fichier : OLS.py Projet : kangzheng1990/PyEcon

誤差項に系列相関が残っている場合、トレンドも含めて、モデルに含まれていない要因が大きい影響を持っている可能性がありますので、思い当たる説明変数を加えてみたり、タイム・トレンドやラグ項を足したり、変分を取るなりして、コントロールしたほうがよいでしょう。

このような系列相関のチェックには、ADF検定によって誤差項の定常性を確認するのも有効だと思います。

"""




# ADF test, H0: Non-stationary
tsa.adfuller(rlt.resid,regression='nc')


# Autocorrel plot of resid
autocorrelation_plot(rlt.resid) # Show ACF of residuals
ACF_resid=tsa.acf(rlt.resid) # Keep ACF of residuals

"""
誤差項が定常であれば、モデル内の説明変数と被説明変数との間に安定した（一時的に外れても帰ってくるような）関係があることが保証されます。また、多くの経済変数はそもそも非定常ですので、残差が定常の場合、重要な要因がモデルから脱落している可能性も低くなります。

系列相関以外に大切なのは、多重共線性（マルチコリニアリティ）のチェックでしょう。これは、説明変数の間に強い相関がある場合に生じるもので、推定される係数の符号が反転してしまったりしますので厄介です。

以下のようにVIF統計量を計算して、10を大きく上回っていなければ、ひとまず安心と考えます。また、VIFを参照して機械的に判定しなくても、想定される符号と逆の符号を持った説明変数が現れれば、経験的にマルチコに気づくと思います。もっとも、マルチコの解決策は強相関している説明変数のどれかを取り除くくらいしか解決策がありません。

リッジ回帰など、パラメター空間を制約するやり方はそもそもパラメターの不偏性を犠牲にする上に、必ずしもマルチコを解消させる保障がないため、歪めますので、計量経済学では推奨されていません。

"""


# Checking Multicolinearity by VIF

Exemple #21

0

Afficher le fichier

]

possible_feature_sets = [
    basic_features, features_after_chapter_3, features_after_chapter_4,
    features_after_chapter_5, selected_features
]
feature_names = [
    'initial set', 'Chapter 3', 'Chapter 4', 'Chapter 5', 'Selected features'
]

# Let us first study whether the time series is stationary and what the autocorrelations are.

dftest = adfuller(dataset['acc_phone_x'], autolag='AIC')
print dftest

autocorrelation_plot(dataset['acc_phone_x'])
plot.show()

# Now let us focus on the learning part.

learner = TemporalRegressionAlgorithms()
eval = RegressionEvaluation()

# We repeat the experiment a number of times to get a bit more robust data as the initialization of the NN is random.

repeats = 5

# we set a washout time to give the NN's the time to stabilize. We do not compute the error during the washout time.

washout_time = 10

Exemple #22

0

Afficher le fichier

Fichier : time_series.py Projet : akshaykatre/hard_drive

from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error


def parser(x):
    return datetime.strptime('190' + x, '%Y-%m')


series = read_csv('/Home/Downloads/sales-of-shampoo-over-a-three-ye.csv',
                  header=0,
                  parse_dates=[0],
                  index_col=0,
                  squeeze=True,
                  date_parser=parser)

autocorrelation_plot(series)
# fit model
model = ARIMA(series, order=(5, 1, 0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())

X = series.values
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:len(X)]

Exemple #23

0

Afficher le fichier

Fichier : U2L6P3_time_series.py Projet : JingruWu10/DataScienceExamples

loan_count_summary = year_month_summary['issue_d']
loan_count_summary.to_csv("LoanStatsGrouped.csv", index=True) # We can read later more rapidly
#loan_count_summary = pd.read_csv('LoanStatsGrouped.csv')

# What do we really care? Just the y values
y = loan_count_summary.values

plt.plot(y)
plt.suptitle("the values")
plt.show()

# Test if the time series is stationary
# Here I make an autocorrelation plot of the data. The decay with the lag indicate that the TS is not stationary
import statsmodels.api as sm
from pandas.tools.plotting import autocorrelation_plot
autocorrelation_plot(y)
plt.suptitle("Original series")
plt.savefig("TS.pdf")
#plt.show()

# #### Dickey Fuller test
# I can also perform a Dickey Fuller test for presence of unit roots
test = sm.tsa.adfuller(y)
print 'adf: ', test[0] 
print 'p-value: ', test[1]
print 'Critical values: ', test[4]
if test[0] > test[4]['10%']: 
    print 'has unit roots , the series is not stationary'
else:
    print 'has no unit roots , the series is stationary'

Exemple #24

0

Afficher le fichier

Fichier : Sbilanciamento.py Projet : davideflo/Python_code

f, axarr = plt.subplots(2)
axarr[0].plot(nord16.resample('D').mean(), lw = 2)
axarr[1].plot(nord15.resample('D').mean(), color = 'red', lw = 2)

f, axarr = plt.subplots(2)
axarr[0].plot(nord16.resample('D').std(), lw = 2)
axarr[1].plot(nord15.resample('D').std(), color = 'red', lw = 2)

var_nord16 = np.array(nord16.resample('D').std()).ravel()
var_nord15 = np.array(nord15.resample('D').std()).ravel()

plt.figure()
plt.hist(np.array(var_nord16), bins = 20)
plt.figure()
plotting.autocorrelation_plot(pd.Series(var_nord16))
plt.figure()
plotting.autocorrelation_plot(pd.Series(np.random.sample(size = len(var_nord16))))

d16nord = DistBetweenZeroVarDays(var_nord16)
d15nord = DistBetweenZeroVarDays(var_nord15)

plt.figure()
plt.hist(np.array(d16nord))
plt.figure()
plt.hist(np.array(d15nord))

np.mean(d16nord)
np.mean(d15nord)
np.std(d16nord)
np.std(d15nord)

Exemple #25

0

Afficher le fichier

Fichier : U2L6P3_time_series.py Projet : JingruWu10/DataScienceExamples

# converts string to datetime object in pandas:
df['issue_d_format'] = pd.to_datetime(df['issue_d']) 
dfts = df.set_index('issue_d_format') 
year_month_summary = dfts.groupby(lambda x : x.year * 100 + x.month).count()
loan_count_summary = year_month_summary['issue_d']


# #### Test if the time series is stationary
# Here I make an autocorrelation plot of the data. The decay with the lag indicate that the TS is not stationary

# In[24]:

import statsmodels.api as sm
from pandas.tools.plotting import autocorrelation_plot
autocorrelation_plot(loan_count_summary)


# #### Dickey Fuller test
# I can also perform a Dickey Fuller test for presence of unit roots

# In[25]:

test = sm.tsa.adfuller(loan_count_summary.values)
print 'adf: ', test[0] 
print 'p-value: ', test[1]
print 'Critical values: ', test[4]
if test[0] > test[4]['10%']: 
    print 'has unit roots , the series is not stationary'
else:
    print 'has no unit roots , the series is stationary'

Exemple #26

0

Afficher le fichier

Fichier : BTC_server.py Projet : davideflo/Python_code

data_bit = []
for i in range(tsbtc.size-1):
    xy = np.array([tsbtc.ix[i],tsbtc.ix[i+1]])
    data_bit.append(xy)

dataset = np.array(data_bit)

H, xedges, yedges = np.histogram2d(dataset[:,0], dataset[:,1], bins = 20, normed = True)

plt.figure()
im = plt.imshow(H, interpolation='nearest', origin='low',extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])

plt.figure()
plt.plot(statsmodels.api.tsa.acf(tsbtc))
plt.figure()
plotting.autocorrelation_plot(tsbtc.ix[1340:])

reversed_arr = np.fliplr([np.array(tsbtc)])[0]
plt.figure()
plotting.autocorrelation_plot(reversed_arr)
####################################################################################################
def get_dataset(ts):
    data_bit = []
    for i in range(ts.size-1):
        xy = np.array([ts.ix[i],ts.ix[i+1]])
        data_bit.append(xy)
    dataset = np.array(data_bit)
    return dataset
####################################################################################################
def find_closest_index(edges, x):
    diffs = np.abs(edges - x)

Exemple #27

0

Afficher le fichier

Fichier : Pricing.py Projet : davideflo/Python_code

pun.append(data3['PUN [€/MWH]'].dropna().values.ravel())

unlisted =  [item for sublist in pun for item in sublist]

df = pd.DataFrame(unlisted)
df = df.set_index(pd.date_range('2014-01-01', '2016-12-14', freq = 'H')[:df.shape[0]])

df.plot()
df.resample('D').mean().plot()
df.resample('M').mean().plot()

plt.figure()
plotting.lag_plot(df.resample('M').mean())

plt.figure()
plotting.autocorrelation_plot(df)
plt.figure()
plotting.autocorrelation_plot(df.resample('D').mean())

plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2014].resample('D').mean())
plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2015].resample('D').mean())
plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2016].resample('D').mean())

plt.figure()
plotting.lag_plot(df.ix[df.index.year == 2014])
plt.figure()
plotting.lag_plot(df.ix[df.index.year == 2015], color = 'red')
plt.figure()

Exemple #28

0

Afficher le fichier

pyplot.show()

pyplot.plot(merged_dataframe['Date'], merged_dataframe['Close'])
pyplot.xlabel('Years')
pyplot.ylabel('Stock Closing Prices')
pyplot.show()

merged_dataframe.info()

merged_dataframe['Date'] = merged_dataframe['Date'].dt.strftime('%Y-%m')

df_dateclose = pd.Series(merged_dataframe['Close'].values,
                         index=merged_dataframe['Date'])
print(df_dateclose.index)
print(df_dateclose.head())
autocorrelation_plot(df_dateclose)
pyplot.show()

merged_dataframe['Close'].head()

plot_acf(df_dateclose)
plot_pacf(df_dateclose)

plot_pacf(df_dateclose, lags=50)

plot_acf(df_dateclose)
plot_acf(df_dateclose, lags=50)

arima_df = pd.DataFrame(merged_dataframe, index=merged_dataframe['Date'])

arima_df.index

Exemple #29

0

Afficher le fichier

Fichier : PUN.py Projet : davideflo/Python_code

import Fourier

reconstructed = Fourier.fourierExtrapolation(dpun, 0, 16)

plt.figure()
plt.plot(dpun)
plt.plot(reconstructed, color = 'red')

np.mean(dpun - reconstructed)
np.std(dpun - reconstructed)

from pandas.tools import plotting

plt.figure()
plotting.lag_plot(pd.DataFrame(dpun))

plt.figure()
plt.plot(statsmodels.api.tsa.acf(dpun))

lags = []
for i in range(dpun.size - 1):
    lags.append(np.array([dpun[i], dpun[i+1]]))
    
lags = pd.DataFrame(lags)
lags.corr()

plt.figure()
plotting.lag_plot(pd.DataFrame(dpun), lag = 7)
plt.figure()
plotting.autocorrelation_plot(pd.DataFrame(dpun))

Exemple #30

0

Afficher le fichier

Fichier : density.py Projet : alexander-yu/Density-Data-Analysis

def autocorrelation(building, floor, group=None, start=_start, end=_end):
    floor_data = get_series(building, group=group, start=start, end=end)
    series = floor_data[floor]
    autocorrelation_plot(series)

Exemple #31

0

Afficher le fichier

Fichier : visualization of results 14 sector.py Projet : laurensWe/SeminarFinance

name = '$R_0$ development through time for windowsize '+str(ws)
plt.title(name)
plt.savefig(name)  

plt.clf()
ax = plt.subplot()
m.boxplot(ax=ax, rot=90)
name = 'Spread of estimated parameters $p_ij$ for windowsize '+str(ws)
plt.autoscale(tight=True)
plt.title(name)
plt.savefig(name)

plt.clf()
ax = plt.subplot()
for i in range(36):
    autocorrelation_plot(m.iloc[i,:], ax=ax)#,label=str(m.columns[i]))
name = 'Autocorrelation plot of the parameter estimates for window size '+str(ws)
plt.title(name)
plt.savefig(name)
#plt.legend()

plt.clf()
ax = plt.subplot()
for i in range(6):
    autocorrelation_plot(r.iloc[i,:], ax=ax)#,label=str(m.columns[i]))
name = 'Autocorrelation plot of the $R_0$ for window size '+str(ws)
plt.title(name)
plt.savefig(name)

plt.clf()
diffs = diff(r)

Exemple #32

0

Afficher le fichier

Fichier : Smoothing and Forecast 101.py Projet : alexperrier/gads

plt.plot(res_ma.window_size, res_ma.MAPE)
plt.show()

# EWMA

fig= plt.figure(figsize=(12,9))
plt.plot(ts.index, ts.rings, label = 'Ring size', alpha = 0.5)
for i in  np.linspace(1, 0.0001,10):
    plt.plot(ts.index, ts.rings.ewm(alpha = i).mean() + i* 10 , label = 'EWMA: %s'% i)
plt.xlim(xmin = min(ts.index)-1, xmax = max(ts.index) +1 )
plt.legend(loc='best')

# Autocorrelation
from pandas.tools.plotting import autocorrelation_plot

autocorrelation_plot(ts.rings)

# Load dow jones
ts = pd.read_csv('../data/Dow-Jones.csv', parse_dates=['Date'], index_col='Date', infer_datetime_format = True)
ts = ts[:'2010-01-01']

autocorrelation_plot(ts.Value)

# Avg temp
ts = pd.read_csv('../data/mean-daily-temperature.csv', parse_dates=['date'], index_col='date', infer_datetime_format = True)

autocorrelation_plot(ts.temp)

# PACF
import statsmodels.api as sm

Exemple #33

0

Afficher le fichier

Fichier : arma.py Projet : ChaithralakshmiS/AdsAnalysis-Clustering

def predict_arma(ad_group, pred_date):
    warnings.filterwarnings("ignore")
    ads_file = 'data/ad_table.csv'
    df = pd.read_csv(ads_file, header=0, sep=',')
    df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True)
    best_aic = np.inf
    best_order = None
    best_mdl = None
    max_lag = 30
    tuning_result = {}
    #     list_ad_group = set(df['ad'].values)
    if (ad_group in df['ad'].unique()):
        df_ad_group_train = df[df['ad'] == ad_group]
        df_ad_group_train = df_ad_group_train.reset_index()
        df_arma_train = df_ad_group_train[['shown', 'date']]
        series_train = pd.Series(df_arma_train['shown'],
                                 index=df_arma_train.index)
        for alpha in range(5):
            for beta in range(5):
                try:
                    tmp_mdl = ARMA(series_train.values,
                                   order=(alpha, beta)).fit(method='mle',
                                                            trend='nc')
                    tmp_aic = tmp_mdl.aic
                    if tmp_aic < best_aic:
                        best_aic = tmp_aic
                        best_order = (alpha, beta)
                        best_mdl = tmp_mdl
                except:
                    continue
        score, pvalue, _, _ = jarque_bera(best_mdl.resid)

        if pvalue < 0.10:
            print('The residuals may not be normally distributed.')
        else:
            print('The residuals seem normally distributed.')
        tuning_result = (best_aic, best_order)
        print('Ad_group: {} aic: {:6.2f} | best order: {}'.format(
            ad_group, best_aic, best_order))

        df_ad_group_train['time_period'] = (
            df_ad_group_train['date'] - df_ad_group_train['date'][0]).dt.days
        X = df_ad_group_train[['time_period']].values
        y = df_ad_group_train['shown'].values
        series_train.plot(title='Shown values trend', color='C1')
        plt.ylabel('shown values')
        plt.xlabel('Days gap from 2015-10-01')
        plt.scatter(X, y, facecolor='gray', edgecolors='none')
        plt.show()
        #check for auto correlation
        lag_plot(series_train)
        plt.show()
        autocorrelation_plot(series_train)
        plt.show()
        plot_acf(series_train.values, lags=max_lag)
        plt.show()

        data = series_train.values
        data = data.astype('float32')
        model = ARMA(data, order=best_order)
        #         model_fit = model.fit(transparams=False)
        try:
            model_fit = model.fit(transparams=False)
            model_fit.plot_predict(plot_insample=True)
            plt.scatter(X, y, color='gray')
            plt.title('ARMA')
            plt.show()
            days_gap = (pd.to_datetime(pred_date) -
                        df_arma_train['date'][0]).days
            forecast = model_fit.forecast(steps=days_gap)

            print('Prediction of shown value for', pred_date, '=')
            print(forecast[0][0])
        except ValueError:
            print('This data is not suitable for ARMA')
    else:
        print("Ad group does not exist")

Exemple #34

0

Afficher le fichier

Fichier : Pattern_Analysis.py Projet : davideflo/Python_code

### for bootstrap: do I sample only from the past year, same month?


for i in range(12):
    for j in range(5):
        print("difference between {} and {} for month {}".format(bymonth2.columns[j+1], bymonth2.columns[j], 
              bymonth2.index[i]))
        #print(bymonth2.apply(lambda j: bymonth2[bymonth2.columns[j+1]].ix[i] - bymonth2[bymonth2.columns[j]].ix[i],axis=1))
        print(bymonth2[bymonth2.columns[j+1]].ix[i] - bymonth2[bymonth2.columns[j]].ix[i])


from pandas.tools.plotting import autocorrelation_plot

for i in range(12):
    plt.figure()
    autocorrelation_plot(bymonth2.ix[i])

### hour-by-hour
letters = 'abcdefghijklmnopqrstuvwxyz'
lets = []
for i in range(rng.size):
    lets.append(letters[rng[i].hour])

letters_dict = {'Letters': lets, 'csud': pun}
hdf = pd.DataFrame(letters_dict).set_index(rng)

hourwise= {}

for i in range(24):
    letter = letters[i]
    hour = hdf.ix[hdf['Letters'] == letter]

Exemple #35

0

Afficher le fichier

Fichier : analysis.py Projet : gabrielpreti/stock_analysis

ax.set_ylabel("Y Label")
ax.set_zlabel("Z Label")
ax.set_xticks(df.finalDate[x].apply(lambda d: d.strftime("%Y-%m-%d")).values)
plt.show()


#################Several plots in 2D
df.plot(subplots=True)
df.plot(x="finalDate", y="finalBalance")
df.hist(by=["windowSize", "trainingSize"])
df.boxplot("finalBalance", by=["windowSize", "trainingSize"])
scatter_matrix(df, alpha=0.2, diagonal="kde")
df.plot(x="finalDate", y="finalBalance", kind="kde")

parallel_coordinates(df, "windowSize")
autocorrelation_plot(df.finalBalance)
radviz(df, "finalBalance")
df.plot(colormap="jet")

#################More specific plots in 2D
f, (ax1, ax2) = plt.subplots(2, 3)
ax1[0].plot(df.groupby(["windowSize"]).mean()["finalBalance"])
ax1[0].set_title("Window Size Mean")
ax1[0].set_ylim((5000, 15000))

ax2[0].plot(df.groupby(["windowSize"]).sum()["finalBalance"])
ax2[0].set_title("Window Size Sum")

ax1[1].plot(df.groupby(["trainingSize"]).mean()["finalBalance"])
ax1[1].set_title("Training Size Mean")
ax1[1].set_ylim((5000, 15000))

Exemple #36

0

Afficher le fichier

Fichier : ARIMA_shampoo-sales.py Projet : mlubinsky/mlubinsky.github.com

from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error


from pandas.tools.plotting import autocorrelation_plot

def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')

series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
print(series.head())
series.plot()
pyplot.show()

autocorrelation_plot(series)
pyplot.show()


# fit model
model = ARIMA(series, order=(5,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

# plot residual errors
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()

residuals.plot(kind='kde')
pyplot.show()

Exemple #37

0

Afficher le fichier

line = np.linspace(0, 336, 336)
plt.plot(line, label[0:336])
plt.xlabel('Hour')
plt.ylabel('Power Demand')
plt.title('Power Demand of first 14 days')
plt.show()
lag_plot(label)

# Plotting the lag plot of target feature
plt.title('Lag plot of Power Demand')
plt.xlabel('P(t)')
plt.ylabel('P(t+1)')
plt.show()

# Plotting auto-correlation
autocorrelation_plot(label[0:1000])
plt.show()

# Splitting train and test data
train_data, test_data = data[0:119832], data[119832:]
train_label, test_label = label[0:119832], label[119832:]

# Implementing Persistence Model
df = pd.concat([label.shift(48), label], axis=1)
df.columns = ['t-1', 't+1']
X = df.values
train, test = X[0:119832], X[127656:]
train_X, train_y = train[:, 0], train[:, 1]
test_X, test_y = test[:, 0], test[:, 1]