コード例 #1
0
def lagPlot(ySeries,plotName="plot"):
	plt.figure()
	plt.title(plotName)
	data = pandas.Series(ySeries)
	lag_plot(data, marker='2', color='green')
        plt.savefig("output.png")
	plt.show()
コード例 #2
0
ファイル: timeseries.py プロジェクト: orazaro/kgml
def ts_plots(rets, figsize=(12, 10)):
    import matplotlib.pyplot as plt
    fig, axarr = plt.subplots(2, 2, sharex=False, sharey=False,
                              figsize=figsize)
    axgen = (e for e in np.array(axarr).ravel())

    rets.plot(kind='line', ax=axgen.next())  # .set_title("data")
    rets.plot(kind='hist', bins=50, ax=axgen.next())  # .set_title("histogram")
    # rets.plot(kind='density',ax=axgen.next()).set_title("density")
    lag_plot(rets, lag=1, ax=axgen.next())  # .set_title("")
    autocorrelation_plot(rets, ax=axgen.next())
コード例 #3
0
def TestIndipendence(st, zona):
    
    cnlist = (st[['CODICE RUC']].values == 'UC_DP1608_'+zona).ravel().tolist()
    cnor = st.ix[cnlist]
    cnor = cnor.reset_index(drop = True)     
    d = np.random.randint(cnor.shape[0], size = 50)
    rem = set(range(cnor.shape[0])).difference(d)
    sam1 = cnor[['SEGNO SBILANCIAMENTO AGGREGATO ZONALE']].ix[d]
    sam2 = cnor[['SEGNO SBILANCIAMENTO AGGREGATO ZONALE']].ix[np.random.choice(list(rem), size = 50)]
    plt.figure()
    plt.hist(sam1.values.ravel())
    plt.title('sample 1')
    plt.figure()
    plt.hist(sam2.values.ravel())
    plt.title('sample 2')

    si = []
    for i in range(cnor.shape[0]):
        si.append(dateutil.parser.parse(cnor[cnor.columns[1]].ix[i]))
    
    CN = cnor.set_index(pd.to_datetime(si))
    
    sm = CN[['SEGNO SBILANCIAMENTO AGGREGATO ZONALE']].resample('D').mean()    
    
    d = np.random.randint(sm.shape[0], size = 50)
    rem = set(range(sm.shape[0])).difference(d)
    sam1 = sm.ix[d].dropna()
    sam2 = sm.ix[np.random.choice(list(rem), size = 50)].dropna()
    plt.figure()
    plt.hist(sam1.values.ravel())
    plt.title('sample 1')
    plt.figure()
    plt.hist(sam2.values.ravel())
    plt.title('sample 2')

    plt.figure()
    plotting.lag_plot(sm)
    plt.title('lag = 1')
    plt.figure()
    plotting.lag_plot(sm, lag = 2)
    plt.title('lag = 2')
    plt.figure()
    plotting.lag_plot(sm, lag = 5)
    plt.title('lag = 5')
    plt.figure()
    plotting.lag_plot(sm, lag = 10)
    plt.title('lag = 10')
    plt.figure()
    plotting.lag_plot(sm, lag = 30)
    plt.title('lag = 30')

    
    return 0
コード例 #4
0
    def plot_lag(self, lag=1, ax=None):
        """
        Plots a lag plot of power data
        http://www.itl.nist.gov/div898/handbook/eda/section3/lagplot.htm

        Returns
        -------
        matplotlib.axis
        """
        if ax is None:
            ax = plt.gca()
        for power in self.power_series():
            lag_plot(power, lag, ax=ax)
        return ax
コード例 #5
0
ファイル: electric.py プロジェクト: aagaard/nilmtk
    def plot_lag(self, lag=1, ax=None):
        """
        Plots a lag plot of power data
        http://www.itl.nist.gov/div898/handbook/eda/section3/lagplot.htm

        Returns
        -------
        matplotlib.axis
        """
        if ax is None:
            ax = plt.gca()
        for power in self.power_series():
            lag_plot(power, lag, ax=ax)
        return ax
コード例 #6
0
ファイル: timeseries.py プロジェクト: yuqli/spark-
def my_lag_plot(data,
                filename=None,
                title=None,
                xlabel="Lag time(s)",
                ylabel="Time(s)"):
    """Generates a lag plot.
    Arguments:
    data -- list of data points
    Keyword arguments:
    lag -- which lag to plot
    filename -- filename to write graph to (None plots to screen)
    title -- graph title (if None, then "Lag %d plot" % lag is used)
    xlabel -- label on x-axis
    ylabel -- label on y-axis
    """
    if title is None:
        title = "Lag plot"
    plt.cla()
    p = lag_plot(data)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    if filename is not None:
        plt.savefig(filename)
    else:
        plt.show()
コード例 #7
0
def autocorr():
    import pandas.tools.plotting as ptp
    from statsmodels.graphics.tsaplots import plot_acf
    from statsmodels.tsa.ar_model import AR

    qdl = Quandl()
    start, end = "2017-01-01", "2018-01-01"
    es = qdl.get_data("ES", start=start, end=end)
    print(es.head())

    xs = es['Settle']
    print(type(xs.index))

    ptp.lag_plot(xs)
    #plt.show()

    ptp.autocorrelation_plot(xs)
    #plt.show()

    plot_acf(xs, lags=7)
    #plt.show()

    train, test = xs[1:len(xs) - 7], xs[len(xs) - 7:]

    model = AR(train, dates=xs.index)
    ar_fit = model.fit()

    print('Lag: %s' % ar_fit.k_ar)
    print('Coefficients: %s' % ar_fit.params)

    #TODO fix error 'unknown string format'
    ar_predicts = ar_fit.predict(start=train[0],
                                 end=train[len(train) - 1],
                                 dynamic=False)

    for x in range(len(ar_predicts)):
        print('predicted: %f vs. expected: %f' % (ar_predicts[x], test[x]))

    print(len(test), len(ar_predicts))

    error = mean_squared_error(test, ar_predicts)
    print('Test MSE: %.3f' % error)

    plt.plot(test)
    plt.show(ar_predicts, color='red')
    plt.show()
コード例 #8
0
ファイル: _5_readHDF.py プロジェクト: Sandy4321/analysis
def plotLag(pth, bucketName):
    df = pd.read_hdf(pth + bucketName, 'capitalKDF')
    plt.subplot(2, 2, 1)
    lag_plot(df['A', 'p', '1'])
    plt.title("Lag plot for best ask price")
    plt.subplot(2, 2, 2)
    lag_plot(df['A', 'v', '1'])
    plt.title("Lag plot for best ask volume")
    plt.subplot(2, 2, 3)
    lag_plot(df['B', 'p', '1'])
    plt.title("Lag plot for best bid price")
    plt.subplot(2, 2, 4)
    lag_plot(df['B', 'v', '1'])
    plt.title("Lag plot for best bid volume")
    plt.show()
コード例 #9
0
ファイル: _5_readHDF.py プロジェクト: capitalk/analysis
def plotLag(pth, bucketName):
    df = pd.read_hdf(pth+bucketName,'capitalKDF')
    plt.subplot(2,2,1)
    lag_plot(df['A','p','1'])
    plt.title("Lag plot for best ask price")
    plt.subplot(2,2,2)
    lag_plot(df['A','v','1'])
    plt.title("Lag plot for best ask volume")
    plt.subplot(2,2,3)
    lag_plot(df['B','p','1'])
    plt.title("Lag plot for best bid price")
    plt.subplot(2,2,4)
    lag_plot(df['B','v','1'])
    plt.title("Lag plot for best bid volume")
    plt.show()
コード例 #10
0
 def createPlot(self,data, cols, plotType, msg, pdf):
     fig = plt.figure()
     if plotType in ['hist']:
         fig = data[cols].hist()
     elif plotType in ['pie']:
         fig = data[cols].value_counts().plot.pie()
     elif plotType in ['kde']:
         fig = data[cols].plot.kde()
     elif plotType in ['lag']:
         from pandas.tools.plotting import lag_plot
         fig = lag_plot(data[cols])
     elif plotType in ['autocorrelation']:
         from pandas.tools.plotting import autocorrelation_plot
         fig = autocorrelation_plot(data[cols])
     elif plotType in ['plots']:
         fig = data[cols].plot(x_compat=True)
     else:
         fig = data[cols].value_counts().plot(kind = plotType)
     if plotType in ['bar','hist','kde','lag','autocorrelation','plots']:
         fig.set_ylabel(cols)
     fig.set_title(msg)
     pdf.savefig(fig.get_figure())
コード例 #11
0
ファイル: Pricing.py プロジェクト: davideflo/Python_code
pun = []
pun.append(data1['PUN'].values.ravel())
pun.append(data2['PUN [€/MWH]'].values.ravel())
pun.append(data3['PUN [€/MWH]'].dropna().values.ravel())

unlisted =  [item for sublist in pun for item in sublist]

df = pd.DataFrame(unlisted)
df = df.set_index(pd.date_range('2014-01-01', '2016-12-14', freq = 'H')[:df.shape[0]])

df.plot()
df.resample('D').mean().plot()
df.resample('M').mean().plot()

plt.figure()
plotting.lag_plot(df.resample('M').mean())

plt.figure()
plotting.autocorrelation_plot(df)
plt.figure()
plotting.autocorrelation_plot(df.resample('D').mean())

plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2014].resample('D').mean())
plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2015].resample('D').mean())
plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2016].resample('D').mean())

plt.figure()
plotting.lag_plot(df.ix[df.index.year == 2014])
コード例 #12
0
def gen_cluster_plots(cluster_directory_root, depth):
    # load data
    gc, mt, track = load_data(None, 0)
    data = pd.concat([gc.data, mt.data])

    labels = data.index.values
    pos_labels = labels + '+'
    neg_labels = labels + '-'
    pos_data = pd.DataFrame(data=data.as_matrix(), index=pos_labels,
                            columns=data.columns.values)
    neg_data = pd.DataFrame(data=data.as_matrix(), index=neg_labels,
                            columns=data.columns.values)

    data = pd.concat([data, pos_data, neg_data])

    generic_dir = cluster_directory_root.split('/') + (['*'] * depth)
    generic_dir = ('/').join(generic_dir)
    cluster_directories = \
        glob.glob(generic_dir)

    clusterings = {}
    clusterings_models = {}
    for cluster_dir in cluster_directories:
        try:
            clustering_id = cluster_dir.split('/')[-1:][0]
            # read final clusters
            clusters = {}
            filepath = '/'.join(cluster_dir.split('/') + ['assignments.txt'])
            lines = (open(filepath, 'r').read().splitlines())
            l = 0
            while l < len(lines):
                cluster_name = lines[l]
                cluster_members = lines[l + 1].split('\t')
                clusters[cluster_name] = cluster_members
                l += 4

            clusterings[clustering_id] = clusters

            # load models
            models = {}
            model_files = glob.glob(cluster_dir + '/*')
            for model_file in model_files:
                try:
                    model_id = model_file.split('/')[-1:][0]
                    json = open(model_file).read()
                    models[model_id] = HiddenMarkovModel.from_json(json)
                    print 'model loaded from: ', model_file
                except:
                    pass
            clusterings_models[clustering_id] = models
        except:
            pass

    background = set()
    for clustering in clusterings.itervalues():
        for cid, members in clustering.iteritems():
            background.update(set(members))

    background = list(background)
    # data = data.loc[background, :]

    # generate ranomd clusterings of the same size k as our models
    for clustering_id, clustering in clusterings.iteritems():
        for model_id, members in clustering.iteritems():
            sequences = data.loc[members, :]
            pltdir = '/'.join(cluster_directory_root.split('/') + ['plots'])

            # make line plots directory
            if not os.path.isdir(pltdir + '/line'):
                print "Creating directory...", pltdir
                os.mkdir(pltdir + '/line')

            savename = pltdir + '/line/' + model_id + '_lineplot'

            plt_title = model_id + ' Line Plot'
            ax = sequences.T.plot(legend=False, rot=2)
            ax.set_title(plt_title)
            ax.set_xlabel('Timepoint')
            ax.set_ylabel('Normalized Expression')

            print 'Saving: ', savename
            fig = ax.get_figure()
            fig.savefig(savename)
            fig.clear()

            # make autocorr plots directory
            if not os.path.isdir(pltdir + '/autocorr'):
                print "Creating directory...", pltdir
                os.mkdir(pltdir + '/autocorr')

            savename = pltdir + '/autocorr/' + model_id + '_autocorr'

            plt_title = model_id + ' Autocorr Plot'
            for seq in sequences.index:
                ax = autocorrelation_plot(sequences.loc[seq])
            ax.set_title(plt_title)

            print 'Saving: ', savename
            fig = ax.get_figure()
            fig.savefig(savename)
            fig.clear()

            # make lag plots directory
            if not os.path.isdir(pltdir + '/lag'):
                print "Creating directory...", pltdir
                os.mkdir(pltdir + '/lag')

            from pylab import *
            NUM_COLORS = len(members)
            cm = get_cmap('gist_rainbow')
            colors = []
            for i in range(NUM_COLORS):
                colors.append(cm(1.*i/NUM_COLORS))

            savename = pltdir + '/lag/' + model_id + '_lagplot'

            plt_title = model_id + ' Lag Plot'
            for i, seq in enumerate(sequences.index):
                ax = lag_plot(sequences.loc[seq], c=colors[i])
            ax.set_title(plt_title)

            print 'Saving: ', savename
            fig = ax.get_figure()
            fig.savefig(savename)
            fig.clear()

            """
コード例 #13
0
ファイル: FS.py プロジェクト: davideflo/Python_code
pun_in = pun_g.ix[np.where((pun_g - np.mean(pun_g))/np.std(pun_g) <= 3)]
bil_in6 = bil6.ix[np.where((pun_g - np.mean(pun_g))/np.std(pun_g) <= 3)]

stats.linregress(bil_in6, pun_in)

#########################
diff_pf = fsm['pun'] - fsm['francia']

plt.figure()
plt.plot(diff_pf)
plt.plot(pun)

from pandas.tools import plotting

plt.figure()
plotting.lag_plot(fsm['pun'])
plt.figure()
plotting.lag_plot(diff_pf)

diff_pf.corr(pun)

plt.figure()
plt.scatter(np.array(diff_pf), np.array(pun[1:281]))

##################################

plt.figure()
plt.scatter(np.array(fsm['francia']), np.array(fsm['pun']))


fplm = linear_model.LinearRegression(fit_intercept = True).fit(np.array(fsm['francia']).reshape(-1,1),np.array(fsm['pun']))
コード例 #14
0
dec = statsmodels.api.tsa.seasonal_decompose(tsbtc, freq=52)
dec.plot()

plt.figure()
tsbtc.plot()
plt.axhline(y=200)
plt.axhline(y=500)
plt.figure()
tsc.plot()

tsbtc.corr(tsc)

plt.figure()
plotting.lag_plot(
    tsbtc
)  ### surprising!!! I think the reticular squared structure puts in evidence the
### particular pattern tat I've noticed.
### N.B.: dates from 2013

data_bit = []
for i in range(tsbtc.size - 1):
    xy = np.array([tsbtc.ix[i], tsbtc.ix[i + 1]])
    data_bit.append(xy)

dataset = np.array(data_bit)

H, xedges, yedges = np.histogram2d(dataset[:, 0],
                                   dataset[:, 1],
                                   bins=20,
                                   normed=True)
コード例 #15
0
import random as rnd
import pandas
from pandas.tools.plotting import lag_plot
import matplotlib.pyplot as plt
s = pandas.Series([rnd.random() for i in range(10000)])
plt.figure()
lag_plot(s, marker='o', color='grey')
plt.xlabel('Random Number - s[i]')
plt.ylabel('Lag1(Random Number) - s[i+1]')
plt.show()
コード例 #16
0
ファイル: lag_plot.py プロジェクト: clover9gu/pydata-learn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.tools.plotting import lag_plot


df = pd.read_csv('transcount.csv')
df = df.groupby('year').aggregate(np.mean)

gpu = pd.read_csv('gpu_transcount.csv')
gpu = gpu.groupby('year').aggregate(np.mean)

df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)
df = df.replace(np.nan, 0)
lag_plot(np.log(df['trans_count']))
plt.show()
コード例 #17
0
series = Middle[Topics[0]]
groups = series
years = pd.DataFrame()
for name, group in groups:
    years[name.year] = group.values
years.boxplot()

# In[493]:

type(groups)

# In[455]:

from pandas.tools.plotting import lag_plot

lag_plot(Middle[Topics[7]], color='blue')

# In[425]:

plt.figure(figsize=(14, 8))
m = 0
for i in Topics:
    plt.plot(Middle[i].rolling(window=2).mean(),
             lw=5,
             color=color[m],
             marker=markers[m],
             ms=15)
    m += 1
plt.xticks(year, rotation='vertical', fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel('Year', fontsize=20)
コード例 #18
0
ファイル: CSS.py プロジェクト: davideflo/Python_code
plt.plot(np.array(Diff/pun))

spark_ts = pd.Series(spark['spread'].resample('D').mean(), dtype = 'float64')
pun_ts = pd.Series(pun, dtype = 'float64')

spark_ts.corr(pun_ts)

DS = spark['spread'].resample('D').mean()/pun

plt.figure()
plt.plot(DS)

DS.corr(pun_ts)

plt.figure()
plotting.lag_plot(DS)


plt.figure()
plt.plot(statsmodels.api.tsa.acf(np.array(DS)))
plt.plot(DS.resample('M').mean())
plt.figure()
plt.plot(statsmodels.api.tsa.periodogram(np.array(DS)))

###############################################################################
def fourierExtrapolation(x, n_predict, n_harmonics = 0):
    x = np.array(x)
    n = x.size
    if n_harmonics == 0:
        n_harm = 100                     # number of harmonics in model
    else:
コード例 #19
0
# explore model little further
print("R-squared:")
regr.score(yearsTest, sunspTest)
# result of -0.12 which means flat line better estimator than this model

# reshape time series. index is years between 1700 and 2008
dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
del dta['YEAR']
dta.plot(figsize(12, 3))

import pandas.tools.plotting as pdplot

# first do a lag plot which will show relationship with value this period and value in last period
plt.figure(figsize=(12, 12))
pdplot.lag_plot(dta)
plt.title("Sunspots this year vs. last year\n")

# second do lag plots for 1-4 periods
plt.figure(figsize=(12, 12))

Lags = [1, 2, 3, 4]

plt.subplot(221)
pdplot.lag_plot(dta, lag=Lags[0])
plt.title("Lag = " + str(Lags[0]))

plt.subplot(222)
pdplot.lag_plot(dta, lag=Lags[1])
plt.title("Lag = " + str(Lags[1]))
コード例 #20
0
        
    Nine = pd.DataFrame(nine)

    return Nine
##########################################################################

#for i in range(19,22,1):
#    D = Extract_Hour(i)
#    D.plot()
#    plt.figure()
#    lag_plot(D)


D = Extract_Hour(21)
D.plot()
lag_plot(D)

x21 = data3["PUN"].ix[data3[data3.columns[1]] == 21]
for x,i in enumerate(x21):
    print(x)
    print(i)
    if(i == max(x21)):
        break 
###################################################################################
###################################################################################
### hourwise patterns ###

names = ['data','data2','data3','data4','data5','data6','data7']        
d = {}
d2 = {}
コード例 #21
0
pun.append(data2['PUN [€/MWH]'].values.ravel())
pun.append(data3['PUN [€/MWH]'].dropna().values.ravel())
pun.append(data4['PUN [€/MWH]'].dropna().values.ravel())


unlisted =  [item for sublist in pun for item in sublist]

df = pd.DataFrame(unlisted) ######### to: 2 DAYS AHEAD OF LAST PUN
df = df.set_index(pd.date_range('2014-01-01', '2018-01-02', freq = 'H')[:df.shape[0]])

df.plot()
df.resample('D').mean().plot()
df.resample('M').mean().plot()

plt.figure()
plotting.lag_plot(df.resample('M').mean())

plt.figure()
plotting.autocorrelation_plot(df)
plt.figure()
plotting.autocorrelation_plot(df.resample('D').mean())

plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2014].resample('D').mean())
plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2015].resample('D').mean())
plt.figure()
plotting.autocorrelation_plot(df.ix[df.index.year == 2016].resample('D').mean())

plt.figure()
plotting.lag_plot(df.ix[df.index.year == 2014])
コード例 #22
0
ファイル: script40.py プロジェクト: darkblue-b/kaggleScape
               aspect=2.5,
               palette='BuGn_r')

# Oh man. This doesn't bode well as most of the crimes were not resolved.  This means that there are still quite a lot of outstanding crime cases pending.

# Anyway, we have now reached the last two columns of the dataset, X and Y. These columns are coordinates, coordinates which relate to the "address" column. Just for the purposes of this notebook, I will be plotting these coordinates via lag-plots just to visually investigate if the data is random or not.
#
# Refer to the Pandas visualisation webpage for a more detailed explanation: http://pandas.pydata.org/pandas-docs/version/0.18.1/visualization.html

# In[ ]:

# Importing the lag_plot plotting function
from pandas.tools.plotting import lag_plot
# Lag_plot for X coordinate
plt.figure()
lag_plot(crime.X)

# In[ ]:

lag_plot(crime.Y, c='goldenrod')

# And finally let us look at the autocorrelation plot to look at the X and Y data just to check for randomness in the data over time. If the data is non-random, then one or more of the autocorrelations will be significantly non-zero, taking into account the confidence bands ( dashed and solid lines)

# In[ ]:

from pandas.tools.plotting import autocorrelation_plot
autocorrelation_plot(crime.X, color='k', marker='.', linewidth='0.25')
autocorrelation_plot(crime.Y, color='goldenrod', marker='.', linewidth='0.15')
plt.ylim(-0.15, 0.15)

# Seems pretty random for this time-series data. Anyway this notebook is a work in progress.
コード例 #23
0
def lagPlot(ySeries,plotName="plot"):
	plt.figure()
	plt.title(plotName)
	data = pandas.Series(ySeries)
	lag_plot(data, marker='2', color='green')
	plt.show()
コード例 #24
0
data = data.set_index(data['Date'])
pun = data['PUN [€/MWH]'].dropna().resample('D').mean()
nord = data['MGP NORD [€/MWh]'].dropna().resample('D').mean()

plt.figure()
plt.scatter(np.array(DF[DF.columns[0]]), np.array(pun))
plt.figure()
plt.scatter(np.array(DF[DF.columns[1]]), np.array(pun), color='black')

np.corrcoef(np.array(DF[DF.columns[0]]), np.array(pun))
np.corrcoef(np.array(DF[DF.columns[1]]), np.array(pun))

from pandas.tools import plotting

plt.figure()
plotting.lag_plot(DF['nord-fran'])

perc = []
signed_perc = []
for i in range(DF['nord-fran'].size - 1):
    perc.append(
        np.abs((DF['nord-fran'].ix[i + 1] - DF['nord-fran'].ix[i]) /
               DF['nord-fran'].ix[i]))
    signed_perc.append((DF['nord-fran'].ix[i + 1] - DF['nord-fran'].ix[i]) /
                       DF['nord-fran'].ix[i])

plt.figure()
plt.plot(np.array(signed_perc))

#################### NORD ######################
plt.figure()
コード例 #25
0
####################
# check auto correlation
from matplotlib import pyplot
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
#series = Series.from_csv('daily-minimum-temperatures.csv', header=0)
plot_acf(values, lags=50)
pyplot.show()

####################
# check correlation
from matplotlib import pyplot
from pandas.tools.plotting import lag_plot

lag_plot(market_data_droppped['Close'])
pyplot.show()

####################

# determine best lag
from pandas import Series
from matplotlib import pyplot
from statsmodels.tsa.ar_model import AR
from sklearn.metrics import mean_squared_error

# split dataset
X = values.values
train, test = X[1:len(X) - 5], X[len(X) - 5:]
# train autoregression
model = AR(train)
コード例 #26
0
pun = {"PUN": pun1}
rng2 = pd.date_range(start="01-01-2010", periods = pun1.size,freq = 'H')
ixx = np.arange(56951)
rng = pd.DataFrame([rng2,ixx])
rng.columns = [["rng", "ixx"]]
rng.columns = ["rng"]
dd = {"ixx": ixx, "pun": pun1}
ap = pd.DataFrame(dd).set_index(rng2)

ap2 = pd.DataFrame(pun1)
ap2.columns = ["pun"]

months = np.unique(rng.month)

jan = ap2.ix[ap['ixx'].ix[ap.index.month == 1].tolist()]
lag_plot(jan)

for i in range(1,13,1):
    print i
    plt.figure()
    lag_plot(ap2.ix[ap['ixx'].ix[ap.index.month == i].tolist()])

##### example of kernel density estimation ####
kde_jan = KernelDensity(kernel='gaussian', bandwidth = 4).fit(np.array(jan['pun']).reshape(-1,1))

xplot = np.linspace(0,180,100)
yplot = np.exp(kde_jan.score_samples(xplot.reshape(-1,1)))

plt.figure()
plt.plot(yplot)
#########################################################
コード例 #27
0
from math import log

time_interval = input('Give me a time_interval (10,15,20,30)? ')
print('You said: ' + str(time_interval))

forecasting_horizon = input('Give me the forecasting horizon in hours? ')
print('You said: ' + str(forecasting_horizon))

steps_ahead = (60.0 / time_interval) * forecasting_horizon
print('You want: ' + str(steps_ahead))

#Quick Check for Autocorrelation
series = Series.from_csv(
    '/home/aris/Desktop/Short-Term-Electric-Load-Forecasting-CSL-master/price.csv',
    header=0)
lag_plot(series, lag=300)
pyplot.show()

############################################
plot_acf(series, lags=300)
pyplot.show()
##############################################
series.hist()
pyplot.show()
###############################################
X = series.values
pyplot.plot(X)
pyplot.show()
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
コード例 #28
0
ファイル: 随笔代码.py プロジェクト: jipaipaipai/MyFiles
df = df.replace(np.nan, 0)
df.plot(logy=True)
df[df['trans_count'] > 0].plot(kind='scatter',
                               x='trans_count',
                               y='gpu_trans_count_x',
                               loglog=True)
plt.show()

from pandas.tools.plotting import lag_plot
df = pd.read_csv('gpu.csv')
df = df.groupby('year').aggregate(np.mean)
gpu = pd.read_csv('gpu_transcount.csv')
gpu = gpu.groupby('year').aggregate(np.mean)
df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)
df = df.replace(np.nan, 0)
lag_plot(np.log(df['trans_count']))
plt.show()

from pandas.tools.plotting import autocorrelation_plot
df = pd.read_csv('gpu.csv')
df = df.groupby('year').aggregate(np.mean)
gpu = pd.read_csv('gpu_transcount.csv')
gpu = gpu.groupby('year').aggregate(np.mean)
df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)
df = df.replace(np.nan, 0)
autocorrelation_plot(np.log(df['trans_count']))  #用自相关函数绘制自相关图 plt.show()

#交叉验证法
import pandas as pd
import numpy as np
df = pd.read_csv('wdbc.csv', header=None)
コード例 #29
0
from pandas.tools.plotting import lag_plot
from pandas.tools.plotting import autocorrelation_plot
from scipy.stats.stats import pearsonr
from math import sqrt
from sklearn.metrics import mean_squared_error

# Exploring the data
# Descriptive stats
isig.describe()
isig.plot()
pyplot.show()

# Skewed toward the higher side
isig.hist()
pyplot.show()
# Significantly autocorrelated as expected.
autocorrelation_plot(isig)
pyplot.show()

# Check for autocorrelation. As expected, isig is directly related to previous values. High autocorrelation.
lag_plot(isig)
pyplot.show()

# Check for seasonality
# seasonal = seasonal_decompose(series['isig'], model='additive', frequency=10)
# seasonal.plot()
# pyplot.show()
# seasonal = seasonal_decompose(series['glucose'], model='additive', frequency=10)
# seasonal.plot()
# pyplot.show()
コード例 #30
0
pun = {"PUN": pun1}
rng2 = pd.date_range(start="01-01-2010", periods=pun1.size, freq='H')
ixx = np.arange(56951)
rng = pd.DataFrame([rng2, ixx])
rng.columns = [["rng", "ixx"]]
rng.columns = ["rng"]
dd = {"ixx": ixx, "pun": pun1}
ap = pd.DataFrame(dd).set_index(rng2)

ap2 = pd.DataFrame(pun1)
ap2.columns = ["pun"]

months = np.unique(rng.month)

jan = ap2.ix[ap['ixx'].ix[ap.index.month == 1].tolist()]
lag_plot(jan)

for i in range(1, 13, 1):
    print i
    plt.figure()
    lag_plot(ap2.ix[ap['ixx'].ix[ap.index.month == i].tolist()])

##### example of kernel density estimation ####
kde_jan = KernelDensity(kernel='gaussian',
                        bandwidth=4).fit(np.array(jan['pun']).reshape(-1, 1))

xplot = np.linspace(0, 180, 100)
yplot = np.exp(kde_jan.score_samples(xplot.reshape(-1, 1)))

plt.figure()
plt.plot(yplot)
コード例 #31
0
    sns.set_style('ticks')
    with sns.color_palette("Reds_r"):
        # plot densities of log-transformed data
        plt.figure(figsize=(8, 4))
        for col in data.columns:
            sns.kdeplot(rets[col], shade=True)
        plt.legend(loc=2)
    print 'Log-transformed feature distributions'

    #==============================================================================
    #     Lag_plot

    from pandas.tools.plotting import lag_plot
    for s in list(data.columns):
        plt.figure()
        lag_plot(data.ix[:, s])
#==============================================================================
# Autocorrelation
    from pandas.tools.plotting import autocorrelation_plot
    for s in list(data.columns):
        plt.figure()
        autocorrelation_plot(data.ix[:, s])

#==============================================================================
#http://stackoverflow.com/questions/22179119/normality-test-of-a-distribution-in-python
#    array = np.random.randn(10000)

    from matplotlib import pyplot as plt
    import matplotlib.mlab as mlab
    for s in list(data.columns):
        plt.figure()
コード例 #32
0
plt.plot(np.array(Diff / pun))

spark_ts = pd.Series(spark['spread'].resample('D').mean(), dtype='float64')
pun_ts = pd.Series(pun, dtype='float64')

spark_ts.corr(pun_ts)

DS = spark['spread'].resample('D').mean() / pun

plt.figure()
plt.plot(DS)

DS.corr(pun_ts)

plt.figure()
plotting.lag_plot(DS)

plt.figure()
plt.plot(statsmodels.api.tsa.acf(np.array(DS)))
plt.plot(DS.resample('M').mean())
plt.figure()
plt.plot(statsmodels.api.tsa.periodogram(np.array(DS)))


###############################################################################
def fourierExtrapolation(x, n_predict, n_harmonics=0):
    x = np.array(x)
    n = x.size
    if n_harmonics == 0:
        n_harm = 100  # number of harmonics in model
    else:
コード例 #33
0
# Load dataset
data_set = data_preparation.read_data('./data_set/HourlyDemands_2002-2016.csv')
data, label = data_preparation.split_label(data_set, 'Ontario Demand')
print('Data set Loaded!')
print(data.shape)
print(label.shape)

# Plot 2 weeks of data points
line = np.linspace(0, 336, 336)
plt.plot(line, label[0:336])
plt.xlabel('Hour')
plt.ylabel('Power Demand')
plt.title('Power Demand of first 14 days')
plt.show()
lag_plot(label)

# Plotting the lag plot of target feature
plt.title('Lag plot of Power Demand')
plt.xlabel('P(t)')
plt.ylabel('P(t+1)')
plt.show()

# Plotting auto-correlation
autocorrelation_plot(label[0:1000])
plt.show()

# Splitting train and test data
train_data, test_data = data[0:119832], data[119832:]
train_label, test_label = label[0:119832], label[119832:]
コード例 #34
0
ファイル: BTC_server.py プロジェクト: davideflo/Python_code
tsc = pd.Series(change.ix[list(cd)])

dec = statsmodels.api.tsa.seasonal_decompose(tsbtc, freq = 52)
dec.plot()

plt.figure()
tsbtc.plot()
plt.axhline(y = 200)
plt.axhline(y = 500)
plt.figure()
tsc.plot()

tsbtc.corr(tsc)

plt.figure()
plotting.lag_plot(tsbtc) ### surprising!!! I think the reticular squared structure puts in evidence the 
                         ### particular pattern tat I've noticed.
                         ### N.B.: dates from 2013

data_bit = []
for i in range(tsbtc.size-1):
    xy = np.array([tsbtc.ix[i],tsbtc.ix[i+1]])
    data_bit.append(xy)

dataset = np.array(data_bit)

H, xedges, yedges = np.histogram2d(dataset[:,0], dataset[:,1], bins = 20, normed = True)

plt.figure()
im = plt.imshow(H, interpolation='nearest', origin='low',extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
コード例 #35
0
ファイル: time_series.py プロジェクト: pkings/ewiis3_python
 def create_lag_plot(self, filename):
     fig, ax = plt.subplots()
     lag_plot(self.raw_time_series)
     plt.savefig('{}{}_lag_plot.png'.format(self.figure_output_dir, filename))
     pass
コード例 #36
0
data = data.set_index(data["Date"])
pun = data["PUN [€/MWH]"].dropna().resample("D").mean()
nord = data["MGP NORD [€/MWh]"].dropna().resample("D").mean()

plt.figure()
plt.scatter(np.array(DF[DF.columns[0]]), np.array(pun))
plt.figure()
plt.scatter(np.array(DF[DF.columns[1]]), np.array(pun), color="black")

np.corrcoef(np.array(DF[DF.columns[0]]), np.array(pun))
np.corrcoef(np.array(DF[DF.columns[1]]), np.array(pun))

from pandas.tools import plotting

plt.figure()
plotting.lag_plot(DF["nord-fran"])


perc = []
signed_perc = []
for i in range(DF["nord-fran"].size - 1):
    perc.append(np.abs((DF["nord-fran"].ix[i + 1] - DF["nord-fran"].ix[i]) / DF["nord-fran"].ix[i]))
    signed_perc.append((DF["nord-fran"].ix[i + 1] - DF["nord-fran"].ix[i]) / DF["nord-fran"].ix[i])

plt.figure()
plt.plot(np.array(signed_perc))

#################### NORD ######################
plt.figure()
plt.scatter(np.array(DF[DF.columns[0]]), np.array(nord))
plt.figure()
コード例 #37
0
df2 = df2.dropna()               ###drop rows with null values
df2.head()                 
df2.count()                    ###count no. of not null value in each column

df2.corr()                     ###Check corelation with each column
                               ###change in inventory is poorly co-related remove that from model

df2['Gross domestic product'].plot()                 ##timeseries plot                    
plt.show()
df2['Gross domestic product'].hist()                 ##histogram plot
plt.show()                                           
df2['Gross domestic product'].plot(kind = 'kde')     ##density plot
plt.show()                                           ##data is skewed  left side

lag_plot(df2['Gross domestic product'])              ##positive correlation relationship

autocorrelation_plot(df2['Gross domestic product'])   ##line above dotted line shows statistically significant
                                                      ##we can see trend in data and not seasonality
                                                      
##############################################################
#Multivariate regression
##############################################################

###Train- test split
y  =  df2['Gross domestic product']
x = df2.drop(['Gross domestic product'], axis = 1)

train_size = int(len(x)*0.70)
train_x,test_x = x[0:train_size],x[train_size:len(x)]
train_y,test_y = y[0:train_size],y[train_size:len(x)]
コード例 #38
0
plt.figure()
plotting.autocorrelation_plot(np.array(rsigma[1:]), color='palevioletred')

ratio = []
for i in range(1, len(rsigma)):
    ratio.append(
        float(np.where(np.array(rsigma[:i]) <= 0)[0].size) /
        float(np.array(rsigma[:i]).size))
plt.figure()
plt.plot(np.array(ratio))
#### Try fitting an Ornstein–Uhlenbeck process

plt.figure()
plotting.autocorrelation_plot(ger2015['CAL'].values.ravel())
plt.figure()
plotting.lag_plot(ger2015['CAL'])

X = ger2015['CAL'].values.ravel()[:-1]
y = ger2015['CAL'].values.ravel()[1:]

lm = LinearRegression(fit_intercept=True)

lm.fit(X.reshape(-1, 1), y)

a = lm.coef_[0]
b = lm.intercept_

Sxy = np.sum(X * y)
Sx = np.sum(X)
Sy = np.sum(y)
Sxx = np.sum(X**2)
コード例 #39
0
        
    Nine = pd.DataFrame(nine)

    return Nine
##########################################################################

#for i in range(19,22,1):
#    D = Extract_Hour(i)
#    D.plot()
#    plt.figure()
#    lag_plot(D)


D = Extract_Hour(21)
D.plot()
lag_plot(D)

x21 = data3["PUN"].ix[data3[data3.columns[1]] == 21]
for x,i in enumerate(x21):
    print(x)
    print(i)
    if(i == max(x21)):
        break 
###################################################################################
###################################################################################
### hourwise patterns ###

names = ['data','data2','data3','data4','data5','data6','data7']        
d = {}
d2 = {}
コード例 #40
0
                                  0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
                                  0.95, 0.975, 0.98, 0.99
                              ])
np.where(
    MAE >= scipy.stats.mstats.mquantiles(MAE, prob=0.99))[0].size / MAE.size

plt.figure()
plotting.autocorrelation_plot(Err)
plt.figure()
plotting.autocorrelation_plot(Y)
plt.figure()
plotting.autocorrelation_plot(YH, color='green')

import statsmodels.graphics
statsmodels.graphics.tsaplots.plot_acf(Y, lags=30 * 24)
plotting.lag_plot(Y, lag=30 * 24)
plt.figure()
plotting.autocorrelation_plot(YH, color='green')

col1 = []
col2 = []
i = 0
j = 30 * 24
while j < DTFC.shape[0]:
    col1.append(DTFC[DTFC.columns[31]].ix[i])
    col2.append(DTFC[DTFC.columns[31]].ix[j])
    i += 1
    j += 1

plt.figure()
plt.plot(np.array(col1))
コード例 #41
0
This dataset describes the minimum daily temperatures over
 10 years (1981-1990) in the city Melbourne, Australia.
"""


#Importing Data
import pandas as pd
import matplotlib.pyplot as plt
series = pd.read_csv('/Users/ashutosh/Documents/analytics/Projects/TimeSeries/daily-minimum-temperatures.csv', parse_dates = ['Date'])
print(series.head())
series['temp'].plot()
plt.show()

## Checking for Autocorrelation
from pandas.tools.plotting import lag_plot
lag_plot(series['temp'])
plt.show()

## Calculating Perason Coefficient between neighbouring 

values = pd.DataFrame(series['temp'].values)
dataframe = pd.concat([values.shift(1), values], axis=1)
dataframe.columns = ['t-1', 't+1']
result = dataframe.corr()
print(result)

## Autocorrelation Plots

from pandas.tools.plotting import autocorrelation_plot
autocorrelation_plot(series['temp'])
plt.show()
コード例 #42
0
# | Hexbin         | Drop NaNs               |   |
# | Pie            | Fill 0’s                |   |
# 
# If any of these defaults are not what you want, or if you want to be explicit about how missing values are handled, consider using fillna() or dropna() before plotting.

# ### density plot

# In[51]:


ser = pd.Series(np.random.randn(1000))
ser.plot.kde()
plt.show()


# ### lag plot
# Lag plots are used to check if a data set or time series is random. Random data should not exhibit any structure in the lag plot. Non-random structure implies that the underlying data are not random.

# In[52]:


from pandas.tools.plotting import lag_plot
plt.figure()
data = pd.Series(0.1 * np.random.rand(1000) + 0.9 * np.sin(np.linspace(-99 * np.pi, 99 * np.pi, num=1000)))
lag_plot(data)
plt.show()


# ### matplotlib gallery
# documentation: http://matplotlib.org/gallery.html
コード例 #43
0
import Fourier

reconstructed = Fourier.fourierExtrapolation(dpun, 0, 16)

plt.figure()
plt.plot(dpun)
plt.plot(reconstructed, color = 'red')

np.mean(dpun - reconstructed)
np.std(dpun - reconstructed)

from pandas.tools import plotting

plt.figure()
plotting.lag_plot(pd.DataFrame(dpun))

plt.figure()
plt.plot(statsmodels.api.tsa.acf(dpun))

lags = []
for i in range(dpun.size - 1):
    lags.append(np.array([dpun[i], dpun[i+1]]))
    
lags = pd.DataFrame(lags)
lags.corr()

plt.figure()
plotting.lag_plot(pd.DataFrame(dpun), lag = 7)
plt.figure()
plotting.autocorrelation_plot(pd.DataFrame(dpun))
コード例 #44
0
ファイル: fun_with_quandl.py プロジェクト: nastako/marche
#################################################################

yspper10ydwted = pywt.dwt(yspper10y["Value"].values, "haar", mode="cpd")

# try different levels
yspper10dwtedcoeffs = pywt.wavedec(yspper10y["Value"].values, "haar", level=3)

# try maximum wavelent decomposition
yspper10dwtedcoeffs = pywt.wavedec(yspper10y["Value"].values, "haar")


#################################################################
## fun with lag_plots
#################################################################

lag_plot(yspper10y)
plt.title("Lag plot of 1-year lag of " + yaleds[12].name, fontsize=12)

################################################################################
##### Is the U.S. stock market overvalued?
################################################################################

################################################################################
### Is the U.S. S&P 500 Price-to-Earnings (PE) ratio too expensive right now?
################################################################################

# EY : 20150910 in pandas, for a DataFrame, .plot() methods serves a "wrapper" for matplotlib's plt, see the pandas tutorial for Plotting
fig = yspper10y.plot().figure
fig.suptitle(yaleds[12].name, fontsize=14, fontweight="bold")
ax = fig.add_subplot(111)
ax.set_ylabel(yaleds[12].colname.split(",")[1] + " ;  P/E ratio")
コード例 #45
0
ファイル: PandasPloting.py プロジェクト: gwli/StudyNote
#data = read_csv('data/iris.data')

# <markdowncell>

# ## Lag Plot

# <markdowncell>

# 检测数据是否是随机数据?

# <codecell>

from pandas.tools.plotting import lag_plot
plt.figure()
data = pd.Series(0.1*uniform(1000)+0.9*np.sin(np.linspace(-99*np.pi,99*np.pi,num=1000)))
lag_plot(data)

# <rawcell>

# 测试一个均匀数据

# <codecell>

import numpy as np 
from pandas import DataFrame
import matplotlib.pyplot as plt 

Index= ['aaa', 'bbb', 'ccc', 'ddd', 'eee']
Cols = ['A', 'B', 'C', 'D']
df = DataFrame(abs(np.random.randn(5, 4)), index=Index, columns=Cols)
コード例 #46
0
ファイル: teststat.py プロジェクト: thangbk2209/Ar-wc98
# from pandas import Series
# from pandas import DataFrame
# from pandas import concat
# from matplotlib import pyplot
# series = Series.from_csv('daily-minimum-temperatures-in-me.csv', header=0)
# print series.head()
# # dataframe = concat([values.shift(1), values], axis=1)
# series.plot()
# pyplot.show()

from pandas import Series
from matplotlib import pyplot
from pandas.tools.plotting import lag_plot
series = Series.from_csv('wc98_workload_hour.csv', header=0)
lag_plot(series)
pyplot.show()
コード例 #47
0
yspper10ydwted = pywt.dwt( yspper10y['Value'].values, "haar", mode="cpd")

# try different levels
yspper10dwtedcoeffs = pywt.wavedec(yspper10y['Value'].values, 'haar', level=3)

# try maximum wavelent decomposition
yspper10dwtedcoeffs = pywt.wavedec(yspper10y['Value'].values, 'haar')



################################################################# 
## fun with lag_plots
################################################################# 

lag_plot(yspper10y)
plt.title("Lag plot of 1-year lag of "+yaleds[12].name, fontsize=12)

################################################################################ 
##### Is the U.S. stock market overvalued?
################################################################################ 

################################################################################ 
### Is the U.S. S&P 500 Price-to-Earnings (PE) ratio too expensive right now?
################################################################################

# EY : 20150910 in pandas, for a DataFrame, .plot() methods serves a "wrapper" for matplotlib's plt, see the pandas tutorial for Plotting
fig = yspper10y.plot().figure
fig.suptitle( yaleds[12].name,fontsize=14,fontweight='bold')
ax = fig.add_subplot(111)
ax.set_ylabel(yaleds[12].colname.split(',')[1] +' ;  P/E ratio')