예제 #1
0
def test_pipe_transformation():
    # SegmentX transform pipe
    pipe = Pype([('seg', SegmentX()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # SegmentXY transform pipe
    pipe = Pype([('seg', SegmentXY()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # Forecast transform pipe
    pipe = Pype([('seg', SegmentXYForecast()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # Padtrunc transform pipe
    pipe = Pype([('trunc', PadTrunc()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)
예제 #2
0
def test_pipe_forecast():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]

    pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])

    forecast_test(pipe, X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]

    forecast_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    forecast_test(pipe, X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    forecast_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    forecast_test(pipe, X, y)

    # cross val

    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)

    cross_validate(pipe, X, y, cv=3)

    X = pd.DataFrame(Xc)
    Xt = [np.random.rand(1000, 10)] * 5
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    cross_validate(pipe, X, y, cv=3)
예제 #3
0
from seglearn.split import temporal_split

X_train, X_else, y_train, y_else = train_test_split(df1_new, df1_new["sales_item_price_created"], test_size=0.2, shuffle=False)
X_valid, X_test, y_valid, y_test = train_test_split(X_else, y_else, test_size=0.5, shuffle=False)

#X_train, X_valid, y_train, y_valid = temporal_split(df1_new, df1_new["sales_item_price_created"], test_size=0.25)

#normalizers = minmax_scale(X_train, y_train)
#%% 

TIME_WINDOW=100
FORECAST_DISTANCE=24

from seglearn.transform import FeatureRep, SegmentXYForecast, last

segmenter = SegmentXYForecast(width=TIME_WINDOW, step=1, y_func=last, forecast=FORECAST_DISTANCE)

X_train_rolled, y_train_rolled,_=segmenter.fit_transform([X_train.values],[y_train])

X_train_rolled

#%%

X_train_rolled.shape

shape = X_train_rolled.shape
X_train_flattened = X_train_rolled.reshape(shape[0],shape[1]*shape[2])
X_train_flattened.shape


X_valid_rolled, y_valid_rolled,_=segmenter.fit_transform([X_valid.values],[y_valid])
예제 #4
0
# remember for a single time series, we need to make a list
X = [X]
y = [y]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25)

# create a feature representation pipeline
# setting y_func = last, and forecast = 200 makes us predict the value of y
# 200 samples ahead of the segment
# other reasonable options for y_func are ``mean``, ``all`` (or create your own function)
# see the API documentation for further details
clf = Pype([('segment',
             SegmentXYForecast(width=200,
                               overlap=0.5,
                               y_func=last,
                               forecast=200)), ('features', FeatureRep()),
            ('lin', LinearRegression())])

# fit and score
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Score: ", score)

# generate some predictions
y, y_p = clf.transform_predict(X, y)  # all predictions
예제 #5
0
# remember for a single time series, we need to make a list
X = [X]
y = [y]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25)

# create a feature representation pipeline
est = Pipeline([('features', FeatureRep()), ('lin', LinearRegression())])

# setting y_func = last, and forecast = 200 makes us predict the value of y
# 200 samples ahead of the segment
# other reasonable options for y_func are ``mean``, ``all`` (or create your own function)
# see the API documentation for further details
segmenter = SegmentXYForecast(width=200,
                              overlap=0.5,
                              y_func=last,
                              forecast=200)
pipe = SegPipe(est, segmenter)

# fit and score
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
print("N segments in test: ", pipe.N_test)
print("Score: ", score)

# generate some predictions
y, y_p = pipe.predict(X, y)  # all predictions
예제 #6
0
def test_pipe_forecast():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]
    est = Pipeline([('ftr', FeatureRep()), ('ridge', Ridge())])

    pipe = SegPipe(est, segmenter=SegmentXYForecast())

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # cross val

    Xt = np.array([np.random.rand(1000, 10) for i in range(5)])
    Xc = np.random.rand(5, 3)
    X = make_ts_data(Xt, Xc)
    y = np.array([np.random.rand(1000) for i in range(5)])

    cross_validate(pipe, X, y)

    # transform pipe
    est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())])

    pipe = SegPipe(est, segmenter=SegmentXYForecast())

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
예제 #7
0
def test_pipe_forecast():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]

    pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # cross val

    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)

    cross_validate(pipe, X, y, cv=3)

    # transform pipe
    pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
예제 #8
0
# with forecasting, X can include the target
X = np.stack([t, y], axis=1)

# remember for a single time series, we need to make a list
X = [X]
y = [y]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25)

# create a feature representation pipeline
# setting y_func = last, and forecast = 200 makes us predict the value of y
# 200 samples ahead of the segment
# other reasonable options for y_func are ``mean``, ``all`` (or create your own function)
# see the API documentation for further details
clf = Pype([('segment', SegmentXYForecast(width=200, overlap=0.5, y_func=last, forecast=200)),
            ('features', FeatureRep()),
            ('lin', LinearRegression())])

# fit and score
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Score: ", score)

# generate some predictions
y, y_p = clf.transform_predict(X, y)  # all predictions