Example #1
0
    def deserialize(self, item, force_bytes_to_unicode=False):
        index = self._index_from_records(item)
        name = item.dtype.names[-1]
        data = item[name]

        if force_bytes_to_unicode:
            if len(data) and isinstance(data[0], bytes):
                data = data.astype('unicode')

            if isinstance(index, MultiIndex):
                unicode_indexes = []
                # MultiIndex requires a conversion at each level.
                for level in range(len(index.levels)):
                    _index = index.get_level_values(level)
                    if isinstance(_index[0], bytes):
                        _index = _index.astype('unicode')
                    unicode_indexes.append(_index)
                index = unicode_indexes
            else:
                if len(index) and type(index[0]) == bytes:
                    index = index.astype('unicode')

        if PD_VER < '0.23.0':
            return Series.from_array(data, index=index, name=name)
        else:
            return Series(data, index=index, name=name)
Example #2
0
def draw_power_graphics(power,sp=None,before=None,after=None,band=100):
    """
    Draw different graphics for the power (the number of graphics depend of how many
    data are included in the power and the value of band)
    power  : dataframe with the values of the power 
    sp     : array of [array of x, array of y] array 3D with the points we 
            want highlight in the graphics with different colors
    before : we want to consider only the value before a certain date
    after  : we want to consider only the value after a certain date
    band   : how many values of power we want draw in each graphics
    --------------------------------------------------------
    Note: this method use the 'time' and 'value' columns and the date must be passed
        in this format -> %Y-%m-%d %H:%M:%S
    """
    i    = 0
    tmp_power = power.copy()
    if before!=None:
        tmp_power = tmp_power[tmp_power['time']<=before]
    if after!=None:
        tmp_power = tmp_power[tmp_power['time']>=after]
    serie = Series.from_array(tmp_power['value'])
    while(i*band<len(serie)):
        pyplot.figure(figsize=(20, 5))
        pyplot.plot(serie[i*band:(i+1)*band],'ro')
        v = [i*band, (i+1)*band, 0, 1200]
        pyplot.axis(v)
        v = [serie.index[0], serie.index[len(serie)-1], 0, 1200]#xmin,xmax,ymin,ymax
        if sp != None:
            j = 0
            while j<len(sp):
                pyplot.scatter(sp[j][0],sp[j][1],marker=(0, 3))
                j += 1
        pyplot.show()
        i += 1
Example #3
0
def get_special_point(power,events,borders,eventName,numericValue):
    """
    Return, and print, the average of energy used during the event which we are
    interesting (example: eventName="coffee")
    power        : dataframe with the values of the power
    events       : dataframe with the events (and respective datetime)
    borders      : [min,MAX] how many seconds before and after the events we want to
                  consider in the graphics
    eventName    : name of the event we want consider for the graphics
    numericValue : value we want to give in the Y axis for each event
    --------------------------------------------------------
    Note: this method use the 'time' and 'value' columns and the date must be passed
        in this format -> %Y-%m-%d %H:%M:%S
    """
    event_consider = events[events['eventName']==eventName].reset_index(drop=True)
    #around turn_on
    i    = 0  
    count = 0
    event_index = []
    while(i<len(event_consider)):
        date   = time.mktime(datetime.strptime(event_consider['time'][i], "%Y-%m-%d %H:%M:%S").timetuple())
        start  = str(datetime.fromtimestamp(date-borders[0]))
        end    = str(datetime.fromtimestamp(date+borders[1]))
        serie = Series.from_array(power[(power['time']>=start)&(power['time']<=end)]['value'])
        if len(serie)>0:
            event_index.append(serie.index[int(len(serie)/2)])
            count += 1
        i += 1
    print("number of", eventName ,"in groudtruth and power=",count)
    return event_index,[numericValue]*len(event_index)
Example #4
0
def draw_around_event(power,events,borders,eventName,maxY=1200):
    """
    Draw different graphics for each event saved in events dataframe. 
    In each graphics is drawn the levels of power around the event
    power     : dataframe with the values of the power 
    events    : dataframe with the events (and respective datetime)
    borders   : [min,MAX] how many seconds before and after the events we want to
                consider in the graphics
    eventName : name of the event we want consider for the graphics
    maxY      : the max Y value of power we in the Cartesian axis
    --------------------------------------------------------
    Note: this method use the 'time' and 'value' columns and the date must be passed
        in this format -> %Y-%m-%d %H:%M:%S
    """
    event_consider = events[events['eventName']==eventName].reset_index(drop=True)
    print("number of", eventName ,"in groudtruth=",len(event_consider))
    i     = 0
    while(i<len(event_consider)):
        date   = time.mktime(datetime.strptime(event_consider['time'][i], "%Y-%m-%d %H:%M:%S").timetuple())
        start  = str(datetime.fromtimestamp(date-borders[0]))
        end    = str(datetime.fromtimestamp(date+borders[1]))
        print(date,start,end)
        i += 1
        serie = Series.from_array(power[(power['time']>=start)&(power['time']<=end)]['value'])
        if len(serie)>0:
            v = [serie.index[0], serie.index[len(serie)-1], 0, maxY]#xmin,xmax,ymin,ymax
            pyplot.figure(figsize=(20, 5))
            pyplot.plot(serie,'ro')
            pyplot.axis(v)
            pyplot.show()
        else:
            print("No data of power for this event")
Example #5
0
    def test_from_M8_structured(self):
        dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
        arr = np.array(dates,
                       dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')])
        df = DataFrame(arr)

        assert df['Date'][0] == dates[0][0]
        assert df['Forecasting'][0] == dates[0][1]

        s = Series(arr['Date'])
        assert isinstance(s[0], Timestamp)
        assert s[0] == dates[0][0]

        s = Series.from_array(arr['Date'], Index([0]))
        assert s[0] == dates[0][0]
Example #6
0
    def test_from_M8_structured(self):
        dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
        arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")])
        df = DataFrame(arr)

        assert df["Date"][0] == dates[0][0]
        assert df["Forecasting"][0] == dates[0][1]

        s = Series(arr["Date"])
        assert isinstance(s[0], Timestamp)
        assert s[0] == dates[0][0]

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            s = Series.from_array(arr["Date"], Index([0]))
            assert s[0] == dates[0][0]
    def test_from_M8_structured(self):
        dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
        arr = np.array(dates,
                       dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')])
        df = DataFrame(arr)

        assert df['Date'][0] == dates[0][0]
        assert df['Forecasting'][0] == dates[0][1]

        s = Series(arr['Date'])
        assert isinstance(s[0], Timestamp)
        assert s[0] == dates[0][0]

        s = Series.from_array(arr['Date'], Index([0]))
        assert s[0] == dates[0][0]
    def test_from_M8_structured(self):
        dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
        arr = np.array(dates,
                       dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')])
        df = DataFrame(arr)

        self.assertEqual(df['Date'][0], dates[0][0])
        self.assertEqual(df['Forecasting'][0], dates[0][1])

        s = Series(arr['Date'])
        self.assertTrue(s[0], Timestamp)
        self.assertEqual(s[0], dates[0][0])

        s = Series.from_array(arr['Date'], Index([0]))
        self.assertEqual(s[0], dates[0][0])
Example #9
0
    def test_from_M8_structured(self):
        dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
        arr = np.array(dates,
                       dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')])
        df = DataFrame(arr)

        assert df['Date'][0] == dates[0][0]
        assert df['Forecasting'][0] == dates[0][1]

        s = Series(arr['Date'])
        assert isinstance(s[0], Timestamp)
        assert s[0] == dates[0][0]

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            s = Series.from_array(arr['Date'], Index([0]))
            assert s[0] == dates[0][0]
Example #10
0
def previsao_matematica(reservatId, data):
    seriesArray = Series.from_array(predict_info.getSeries(reservatId, data))
    seriesValues = seriesArray.values

    mathDict = {'calculado': False, 'volumes': [], 'dias': 0}

    #if isNonStationary(seriesValues) == True:
    days_in_year = 1
    differenced = predict_info.difference(seriesValues, days_in_year)
    # fit model
    model = ARIMA(differenced, order=(1,0,1))
    model_fit = model.fit(disp = -1)
    # multi-step out-of-sample forecast
    forecast = model_fit.forecast(steps=180)[0]
    # invert the differenced forecast to something usable
    mathDict['calculado'] = True
    history = [x for x in seriesValues]
    for yhat in forecast:
        inverted = predict_info.inverse_difference(history, yhat, days_in_year)
        history.append(inverted)
        if inverted >= 0.0:
            mathDict['volumes'].append("%.4f" % round((inverted), 4))
            mathDict['dias'] = mathDict['dias'] + 1
    return mathDict
Example #11
0
def previsao_matematica(reservatId, data):
    seriesArray = Series.from_array(predict_info.getSeries(reservatId, data))
    seriesValues = seriesArray.values

    mathDict = {'calculado': False, 'volumes': [], 'dias': 0}

    #if isNonStationary(seriesValues) == True:
    days_in_year = 1
    differenced = predict_info.difference(seriesValues, days_in_year)
    # fit model
    model = ARIMA(differenced, order=(1, 0, 1))
    model_fit = model.fit(disp=-1)
    # multi-step out-of-sample forecast
    forecast = model_fit.forecast(steps=180)[0]
    # invert the differenced forecast to something usable
    mathDict['calculado'] = True
    history = [x for x in seriesValues]
    for yhat in forecast:
        inverted = predict_info.inverse_difference(history, yhat, days_in_year)
        history.append(inverted)
        if inverted >= 0.0:
            mathDict['volumes'].append("%.4f" % round((inverted), 4))
            mathDict['dias'] = mathDict['dias'] + 1
    return mathDict
Example #12
0
 def deserialize(self, item):
     index = self._index_from_records(item)
     name = item.dtype.names[-1]
     return Series.from_array(item[name], index=index, name=name)
Example #13
0
 def from_records(self, recarr):
     index = self._index_from_records(recarr)
     name = recarr.dtype.names[-1]
     return Series.from_array(recarr[name], index=index, name=name)
 def from_records(self, recarr):
     index = self._index_from_records(recarr)
     name = recarr.dtype.names[-1]
     return Series.from_array(recarr[name], index=index, name=name)
Example #15
0
tot[:10]

# ## 导出或者打印数据

# In[12]:

import sys
file6 = open('F:/电影/数据分析/pydata-book-master/ch06/ex6.csv')
data = pd.read_csv(file6)
data.to_csv('F:/电影/数据分析/pydata-book-master/ch06/ooo.csv')
#data.to_csv(sys.stdout, na_rep='NULL',index=False,column=False)
# data.to_csv(sys.stdout, sep='|')

# In[13]:

kk = Series.from_array('F:/电影/数据分析/pydata-book-master/ch06/tseries.csv')
kk

# ## 手工处理分隔符

# In[14]:

import csv
f = open('F:/电影/数据分析/pydata-book-master/ch06/ex7.csv')
reader = csv.reader(f)
reader

# In[15]:

for line in reader:
    print(line)
Example #16
0
def chapter_2():
    random.seed(0)
    a = random.randn(16).reshape(4, 4) * 10
    print(linalg.det(a))
    print(linalg.inv(a))
    print(np.dot(a, linalg.inv(a)))
    [eig_value, eig_vector] = linalg.eig(a)
    print(eig_value)
    print(eig_vector)

    def sample_func(x):
        return x**2 + 2 * x + 1

    print(sp.optimize.newton(sample_func, 0))
    print(minimize_scalar(sample_func, method="Brent"))

    sample_pandas_data = Series.from_array(
        [12, 23, 34, 45, 56, 67, 78, 89, 90, 121],
        index=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])
    print(sample_pandas_data)

    attri_data1 = {
        'ID': ['100', '101', '102', '103', '104'],
        'city': ['Tokyo', 'Osaka', 'Kyoto', 'Hokkaidao', 'Tokyo'],
        'birth_year': [1990, 1989, 1992, 1997, 1982],
        'name': ['Hiroshi', 'Akiko', 'Yuki', 'Satoru', 'Steeve']
    }

    attri_data_frame1 = DataFrame(attri_data1, index=["a", "b", "c", "d", "e"])
    print(attri_data_frame1)
    print(attri_data_frame1[["ID", "city"]].T)
    print(attri_data_frame1[attri_data_frame1['city'].isin(["Tokyo",
                                                            "Osaka"])])
    print(attri_data_frame1.drop(['birth_year'], axis=1))

    attri_data2 = {
        'ID': ['100', '101', '102', '105', '107'],
        'math': [50, 43, 33, 76, 98],
        'English': [90, 30, 20, 50, 30],
        'sex': ['M', 'F', 'F', 'M', 'M']
    }

    attri_data_frame2 = DataFrame.from_dict(attri_data2)
    print(attri_data_frame2)

    print(pd.merge(attri_data_frame1, attri_data_frame2, "outer"))
    print(attri_data_frame2.groupby("sex")["math"].mean())

    def scatter():
        # 散布図
        random.seed(0)
        x = np.random.randn(300)
        y = np.sin(x) + np.random.randn(300)

        plt.plot(x, y, "o")
        # plt.scatter(x, y)

        plt.title("Title Name")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.grid(True)
        plt.show()

    def continuous():
        # 連続曲線
        np.random.seed(0)
        numpy_data_x = np.arange(1000)

        numpy_random_data_y = np.random.randn(1000).cumsum()

        plt.plot(numpy_data_x, numpy_random_data_y, label="Label")
        plt.legend()

        plt.xlabel("X")
        plt.ylabel("Y")
        plt.grid(True)
        plt.show()

    def sin_and_cos():
        plt.subplot(2, 2, 1)
        x1 = np.linspace(-10, 10, 100)
        plt.plot(x1, np.sin(x1))

        plt.subplot(2, 2, 2)
        x2 = np.linspace(-10, 10, 100)
        plt.plot(x2, np.sin(2 * x2))

        plt.subplot(2, 2, 3)
        x3 = np.linspace(-10, 10, 100)
        plt.plot(x3, np.sin(x3))

        plt.subplot(2, 2, 4)
        x4 = np.linspace(-10, 10, 100)
        plt.plot(x4, np.sin(2 * x4))

        plt.grid(True)
        plt.show()

    def hist():
        random.seed(0)
        plt.subplot(3, 1, 1)
        plt.hist(np.random.randn(10**5) * 10 + 50, bins=60, range=(20, 80))

        plt.subplot(3, 1, 2)
        plt.hist(random.uniform(0.0, 1.0, 1000), bins=100)

        plt.subplot(3, 1, 3)
        plt.hist(random.uniform(0.0, 1.0, 1000), bins=100)

        plt.grid(True)
        plt.show()

    def monte_carlo():
        random.seed(0)
        n = 1000000
        x = random.uniform(-1.0, 1.0, n)
        y = random.uniform(-1.0, 1.0, n)

        r = np.sqrt(x**2 + y**2)
        mask = r < 1
        print("pi =", np.sum(mask) * 4 / n)

        plt.subplot(2, 1, 1)
        plt.scatter(x[mask], y[mask])

        plt.subplot(2, 1, 2)
        plt.scatter(x[mask == 0], y[mask == 0])

        plt.show()
Example #17
0
 def deserialize(self, item, _force_bytes_to_unicode=False):
     index = self._index_from_records(item)
     name = item.dtype.names[-1]
     return Series.from_array(item[name], index=index, name=name)
Example #18
0
print(cleaned)

coeff = {}

for comp in new_s_and_p:
    coeff[comp] = []

for comp1 in new_s_and_p:
    count = 0
    coeffs = {}
    for comp2 in new_s_and_p:
        if(s_and_p.index(comp1)<s_and_p.index(comp2)):
            nums = []
            for i in range (0, len(new_s_and_p[comp1][0])):
                nums.append(new_s_and_p[comp1][0][i]-new_s_and_p[comp2][0][i])
            series = Series.from_array(nums[0:755])
            X = series.values
            result = adfuller(X)
            coeffs[comp2] = (np.corrcoef(new_s_and_p[comp1][0][0:755],new_s_and_p[comp2][0][0:755])[0][1], result[0], new_s_and_p[comp1][1]) 
                
    coeff[comp1] = coeffs
    
print("Done with Coeffs. Now selecting pairs.")
pairs_0_50 = []
pairs_50_80 = []
pairs_80_90 = []
pairs_90_97 = []
pairs_97_100 = []
for comp1 in coeff.keys():
    for comp2 in coeff[comp1].keys():
        if coeff[comp1][comp2][1]<-2.579 and coeff[comp1][comp2][0] <.5:
Example #19
0
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
#from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error
#from sklearn.utils import check_array
from sklearn.grid_search import GridSearchCV
#from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from sklearn import preprocessing
import matplotlib.pyplot as plt

df = pandas.read_csv("C:/Users/user/Desktop/Drchen/Cap8/capsule8.csv")
df1 = pandas.read_excel("C:/Users/user/Desktop/Drchen/Cap8/cap8.xlsx")
df2 = pandas.read_excel("C:/Users/user/Desktop/Drchen/Cap8/tcp_life.xlsx")

x = Series.from_array(df['ts'])
y = Series.from_array(df['depth'])
#binwidth = 200

# 1.time-series & histogram for ts
x.hist()
x.plot()

#time-series & histogram for depth
y.hist()
plt.plot(x, y)

plot_1 = plt.plot(df['ts'])
plot_2 = plt.plot(df['ts'], df['depth'])

#time - series & histogram for rate_1 & rate_5
Example #20
0

#train_valid_X = full_set_X[0:891]
train_valid_X = full_set[0:891]
train_valid_y = targets

#test_X = full_set_X[891:]
test_X = full_set[891:]

train_X, valid_X, train_y, valid_y = train_test_split (train_valid_X, train_valid_y, train_size = .7)

print(full_set.shape, train_X.shape, valid_X.shape, train_y.shape, valid_y.shape, test_X.shape)

from pandas import Series
from matplotlib import pyplot
series = Series.from_array(train['Age'])
series.hist()
pyplot.show()


cl = RandomForestClassifier(n_estimators=100)
#cl = RandomForestClassifier(n_estimators=50, max_features='sqrt')
#cl = GaussianNB()
#cl = LogisticRegression()
#cl = KNeighborsClassifier(n_neighbors=3)
#cl = GradientBoostingClassifier()
#cl = SVC()


#import xgboost as xgb
Example #21
0
from pandas import Series
from pandas import DataFrame
from pandas import TimeGrouper
from matplotlib import pyplot

import numpy as np
import pandas as pd

pdf = engineSample50cycleWindow.values


# COMMAND ----------



series = Series.from_array(pdf)
groups = series.groupby(TimeGrouper(1))
years = DataFrame()

for name, group in groups:
	years[name.year] = group.values
years = years.T
pyplot.matshow(years, interpolation=None, aspect='auto')
pyplot.show()

# COMMAND ----------

import matplotlib.pyplot as plt
dataSet = renamed_df.toPandas()

fig, ax = plt.subplots()
Example #22
0
 def deserialize(self, item):
     index = self._index_from_records(item)
     name = item.dtype.names[-1]
     return Series.from_array(item[name], index=index, name=name)
Example #23
0
 def deserialize(self, item, _force_bytes_to_unicode=False):
     index = self._index_from_records(item)
     name = item.dtype.names[-1]
     return Series.from_array(item[name], index=index, name=name)