def test_make_forecasting_frame_feature_extraction(self): t_index = pd.date_range('1/1/2011', periods=4, freq='H') df, y = dataframe_functions.make_forecasting_frame(x=pd.Series(data=range(4), index=t_index), kind="test", max_timeshift=1, rolling_direction=1) extract_relevant_features(df, y, column_id="id", column_sort="time", column_value="value", default_fc_parameters=MinimalFCParameters())
def test_make_forecasting_frame_list(self): df, y = dataframe_functions.make_forecasting_frame(x=range(4), kind="test", max_timeshift=1, rolling_direction=1) expected_df = pd.DataFrame({"id": [1, 2, 3], "kind": ["test"]*3, "value": [0., 1., 2.], "time": [0., 1., 2.]}) expected_y = pd.Series(data=[1, 2, 3], index=[1, 2, 3], name="value") assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def compute_tsfresh_features(self): """Calculate the features using `tsfresh`.""" value = self.df[self.ts_col] df_shift, y = make_forecasting_frame(value, kind="kind", max_timeshift=self.max_timeshift, rolling_direction=1) extract_start = time.time() X_gen_raw = extract_features(df_shift, column_id="id", column_sort="time", column_value="value", impute_function=impute, n_jobs=8, show_warnings=False) extract_end = time.time() tqdm.write("Extraction time: {}".format(extract_end - extract_start)) non_const_idx = X_gen_raw.apply(pd.Series.nunique) != 1 X_gen_raw_non_const = X_gen_raw.loc[:, non_const_idx] select_start = time.time() X_gen = select_features( X_gen_raw_non_const, y, ml_task='regression') select_end = time.time() tqdm.write("Filtering time: {}".format(select_end - select_start)) tqdm.write("Raw features: {}".format(X_gen_raw.shape[1])) tqdm.write( "Non-constant features: {}".format(X_gen_raw_non_const.shape[1])) tqdm.write("Final filtered features: {}".format(X_gen.shape[1])) return X_gen
def transform(self, X, y=None): max_timeshift = self.determine_timeshift_count(X) x, y = make_forecasting_frame(X["price"], kind="price", max_timeshift=max_timeshift, rolling_direction=1) return x, y
def test_make_forecasting_frame_pdSeries(self): t_index = pd.date_range('1/1/2011', periods=4, freq='H') df, y = dataframe_functions.make_forecasting_frame(x=pd.Series( data=range(4), index=t_index), kind="test", max_timeshift=1, rolling_direction=1) expected_y = pd.Series(data=[1, 2, 3], index=pd.DatetimeIndex([ "2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00" ]), name="value") expected_df = pd.DataFrame({ "id": pd.DatetimeIndex([ "2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00" ]), "kind": ["test"] * 3, "value": [0., 1., 2.], "time": pd.DatetimeIndex([ "2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00" ]) }) assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def main(): files = pd.read_excel( '/home/velaraptor/Downloads/Raw Data 10yrs (2018).xlsx', header=1) files = files.fillna(0) groups = files.groupby('Name') forecast_df = [] for name, group in tqdm.tqdm(groups): if len(group) > 1: group.index = group.Year df_shift, y = make_forecasting_frame(group["FantPt"], kind=name, max_timeshift=10, rolling_direction=1) forecast_df.append(df_shift) features_df = [] for sample in tqdm.tqdm(forecast_df): X = extract_features(sample, column_id="id", column_sort="time", column_value="value", impute_function=impute, show_warnings=False, disable_progressbar=True, default_fc_parameters=EfficientFCParameters()) X = X.reset_index() X.loc[:, 'Name'] = sample['kind'] features_df.append(X) features_time_series = pd.concat(features_df) features_time_series.to_csv('features_time_series.csv', index=False)
def fit_rolling_auto_sklearn(y_train, max_timeshift=10, rolling_direction=1, params=None, my_dict_of_features=None): exog_lag = np.hstack((shift(np.concatenate([y_train, [0]]), shift=1, cval=0.0).reshape(-1, 1), shift(np.concatenate([y_train, [0]]), shift=2, cval=0.0).reshape(-1, 1), shift(np.concatenate([y_train, [0]]), shift=3, cval=0.0).reshape(-1, 1), shift(np.concatenate([y_train, [0]]), shift=12, cval=0.0).reshape(-1, 1))) df_shift, y = make_forecasting_frame(y_train, kind="price", max_timeshift=max_timeshift, rolling_direction=rolling_direction) X_train = extract_features(df_shift, column_id="id", column_sort="time", column_value="value", impute_function=impute, show_warnings=False, default_fc_parameters=my_dict_of_features, disable_progressbar=True) X_train.dropna(axis=1, inplace=True) X_train = np.array(X_train) ts = y_train[2:] exog = np.hstack((X_train[:-1], exog_lag[2:-1])) # print (exog) last_exog = np.concatenate([X_train[-1], exog_lag[-1]]).reshape(1, -1) feature_types = (['numerical'] * 8) automl = autosklearn.regression.AutoSklearnRegressor( time_left_for_this_task=1200, per_run_time_limit=120, ml_memory_limit=2048, tmp_folder=tmp_folder, output_folder=output_folder, initial_configurations_via_metalearning=0, ) automl.fit(exog, ts, dataset_name='airlines', feat_type=feature_types, metric=autosklearn.metrics.mean_squared_error) predict_in_sample = automl.predict(exog) print(automl.show_models()) print('\nStatistics: \n', automl.sprint_statistics()) return automl, last_exog, predict_in_sample
def test_make_forecasting_frame_range(self): df, y = dataframe_functions.make_forecasting_frame(x=np.arange(4), kind="test", max_timeshift=1, rolling_direction=1) expected_df = pd.DataFrame({"id": list(zip(["id"] * 3, np.arange(1, 4))), "kind": ["test"] * 3, "value": np.arange(3), "time": [0, 1, 2]}) expected_y = pd.Series(data=[1, 2, 3], index=[("id", 1), ("id", 2), ("id", 3)], name="value") assert_frame_equal(df.sort_index(axis=1).reset_index(drop=True), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def predict_rolling(model, last_exog, y_train, forecast_horizont, max_timeshift=10, rolling_direction=1, my_dict_of_features=None): """ Predicting values on the next forecast_horizont values """ predictions = np.empty(forecast_horizont) predictions[0] = model.predict(last_exog) for it in range(1, forecast_horizont): y_train = np.append(y_train, predictions[it - 1]) exog_lag = np.hstack((shift(np.concatenate([y_train, [0]]), shift=1, cval=0.0).reshape(-1, 1), shift(np.concatenate([y_train, [0]]), shift=2, cval=0.0).reshape(-1, 1), shift(np.concatenate([y_train, [0]]), shift=3, cval=0.0).reshape(-1, 1), shift(np.concatenate([y_train, [0]]), shift=12, cval=0.0).reshape(-1, 1))) df_shift, y = make_forecasting_frame( y_train, kind="price", max_timeshift=max_timeshift, rolling_direction=rolling_direction) X_train = extract_features(df_shift, default_fc_parameters=my_dict_of_features, column_id="id", column_sort="time", disable_progressbar=True, column_value="value", impute_function=impute, show_warnings=False) X_train.dropna(axis=1, inplace=True) X_train = np.array(X_train) ts = y_train[2:] exog = np.concatenate([X_train[-1], exog_lag[-1]]).reshape(1, -1) y_pred = model.predict(exog) predictions[it] = y_pred return predictions
def test_make_forecasting_frame_range(self): df, y = dataframe_functions.make_forecasting_frame(x=np.arange(4), kind="test", max_timeshift=1, rolling_direction=1) expected_df = pd.DataFrame({ "id": [1, 2, 3], "kind": ["test"] * 3, "value": [0., 1., 2.], "time": [0., 1., 2.] }) assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1))
def test_make_forecasting_frame_pdSeries(self): t_index = pd.date_range('1/1/2011', periods=4, freq='H') df, y = dataframe_functions.make_forecasting_frame(x=pd.Series(data=range(4), index=t_index), kind="test", max_timeshift=1, rolling_direction=1) expected_y = pd.Series(data=[1, 2, 3], index=pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00"]), name="value") expected_df = pd.DataFrame({"id": pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00"]), "kind": ["test"]*3, "value": [0., 1., 2.], "time": pd.DatetimeIndex(["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"]) }) assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def test_make_forecasting_frame_range(self): df, y = dataframe_functions.make_forecasting_frame(x=np.arange(4), kind="test", max_timeshift=1, rolling_direction=1) expected_df = pd.DataFrame({"id": [1, 2, 3], "kind": ["test"]*3, "value": [0., 1., 2.], "time": [0., 1., 2.]}) assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1))
def predict( self, forecast_length: int, future_regressor=[], just_point_forecast: bool = False, ): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ if not _has_tsfresh: raise ImportError("Package tsfresh is required") # num_subsamples = 10 predictStartTime = datetime.datetime.now() # from tsfresh import extract_features from tsfresh.utilities.dataframe_functions import make_forecasting_frame # from sklearn.ensemble import AdaBoostRegressor from tsfresh.utilities.dataframe_functions import impute as tsfresh_impute # from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters max_timeshift = 10 regression_model = 'Adaboost' feature_selection = None max_timeshift = self.max_timeshift regression_model = self.regression_model feature_selection = self.feature_selection sktraindata = self.df_train.copy() X = pd.DataFrame() y = pd.DataFrame() counter = 0 for column in sktraindata.columns: df_shift, current_y = make_forecasting_frame( sktraindata[column], kind="time_series", max_timeshift=max_timeshift, rolling_direction=1, ) # disable_progressbar = True MinimalFCParameters EfficientFCParameters current_X = extract_features( df_shift, column_id="id", column_sort="time", column_value="value", impute_function=tsfresh_impute, show_warnings=False, default_fc_parameters=EfficientFCParameters(), n_jobs=1, ) # current_X["feature_last_value"] = current_y.shift(1) current_X.rename(columns=lambda x: str(counter) + '_' + x, inplace=True) X = pd.concat([X, current_X], axis=1) y = pd.concat([y, current_y], axis=1) counter += 1 # drop constant features X = X.loc[:, X.apply(pd.Series.nunique) != 1] X = X.replace([np.inf, -np.inf], np.nan) X = X.fillna(0) y = y.fillna(method='ffill').fillna(method='bfill') if feature_selection == 'Variance': from sklearn.feature_selection import VarianceThreshold sel = VarianceThreshold(threshold=(0.15)) X = pd.DataFrame(sel.fit_transform(X)) if feature_selection == 'Percentile': from sklearn.feature_selection import SelectPercentile, chi2 X = pd.DataFrame( SelectPercentile(chi2, percentile=20).fit_transform( X, y[y.columns[0]])) if feature_selection == 'DecisionTree': from sklearn.tree import DecisionTreeRegressor from sklearn.feature_selection import SelectFromModel clf = DecisionTreeRegressor() clf = clf.fit(X, y) model = SelectFromModel(clf, prefit=True) X = model.transform(X) if feature_selection == 'Lasso': from sklearn.linear_model import MultiTaskLasso from sklearn.feature_selection import SelectFromModel clf = MultiTaskLasso(max_iter=2000) clf = clf.fit(X, y) model = SelectFromModel(clf, prefit=True) X = model.transform(X) """ decisionTreeList = X.columns[model.get_support()] LassoList = X.columns[model.get_support()] feature_list = decisionTreeList.to_list() set([x for x in feature_list if feature_list.count(x) > 1]) from collections import Counter repeat_features = Counter(feature_list) repeat_features = repeat_features.most_common(20) """ # Drop first line X = X.iloc[1:, ] y = y.iloc[1:] y = y.fillna(method='ffill').fillna(method='bfill') index = self.create_forecast_index(forecast_length=forecast_length) if regression_model == 'ElasticNet': from sklearn.linear_model import MultiTaskElasticNet regr = MultiTaskElasticNet(alpha=1.0, random_state=self.random_seed) elif regression_model == 'DecisionTree': from sklearn.tree import DecisionTreeRegressor regr = DecisionTreeRegressor(random_state=self.random_seed) elif regression_model == 'MLP': from sklearn.neural_network import MLPRegressor # relu/tanh lbfgs/adam layer_sizes (100) (10) regr = MLPRegressor( hidden_layer_sizes=(10, 25, 10), verbose=self.verbose_bool, max_iter=200, activation='tanh', solver='lbfgs', random_state=self.random_seed, ) elif regression_model == 'KNN': from sklearn.multioutput import MultiOutputRegressor from sklearn.neighbors import KNeighborsRegressor regr = MultiOutputRegressor( KNeighborsRegressor(random_state=self.random_seed)) elif regression_model == 'Adaboost': from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import AdaBoostRegressor regr = MultiOutputRegressor(AdaBoostRegressor( n_estimators=200)) # , random_state=self.random_seed)) else: regression_model = 'RandomForest' from sklearn.ensemble import RandomForestRegressor regr = RandomForestRegressor(random_state=self.random_seed, n_estimators=1000, verbose=self.verbose) regr.fit(X, y) combined_index = self.df_train.index.append(index) forecast = pd.DataFrame() sktraindata.columns = [x for x in range(len(sktraindata.columns))] for x in range(forecast_length): x_dat = pd.DataFrame() y_dat = pd.DataFrame() counter = 0 for column in sktraindata.columns: df_shift, current_y = make_forecasting_frame( sktraindata.tail(max_timeshift)[column], kind="time_series", max_timeshift=max_timeshift, rolling_direction=1, ) # disable_progressbar = True MinimalFCParameters EfficientFCParameters current_X = extract_features( df_shift, column_id="id", column_sort="time", column_value="value", impute_function=tsfresh_impute, show_warnings=False, n_jobs=1, default_fc_parameters=EfficientFCParameters(), ) # default_fc_parameters=MinimalFCParameters(), current_X["feature_last_value"] = current_y.shift(1) current_X.rename(columns=lambda x: str(counter) + '_' + x, inplace=True) x_dat = pd.concat([x_dat, current_X], axis=1) y_dat = pd.concat([y_dat, current_y], axis=1) counter += 1 x_dat = x_dat[X.columns] rfPred = pd.DataFrame(regr.predict(x_dat.tail(1).values)) forecast = pd.concat([forecast, rfPred], axis=0, ignore_index=True) sktraindata = pd.concat([sktraindata, rfPred], axis=0, ignore_index=True) sktraindata.index = combined_index[:len(sktraindata.index)] forecast.columns = self.column_names forecast.index = index if just_point_forecast: return forecast else: upper_forecast, lower_forecast = Point_to_Probability( self.df_train, forecast, prediction_interval=self.prediction_interval) predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=forecast.index, forecast_columns=forecast.columns, lower_forecast=lower_forecast, forecast=forecast, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction
def extract_tsfresh_features(timeseries, window_size, threshold): # tsfresh make_forecasting_frame rolling window df_shift, y = make_forecasting_frame(timeseries, kind="x", max_timeshift=window_size, rolling_direction=1) settings_original = EfficientFCParameters() # caculate all features All_features = extract_features(df_shift, column_id="id", column_sort="time", column_value="value", default_fc_parameters=settings_original, impute_function=None, disable_progressbar=True, show_warnings=False, n_jobs=8) # drop the the first window size values All_features = All_features.iloc[window_size - 1:] y = y.iloc[window_size - 1:] # tsfresh fileter out relevant featrues through significant test #kind_to_fc_parameters = filter_features(All_features, y, threshold) #drop columns witch are all nan All_features = All_features.dropna(axis=1, how='all') # nan percentage nan_percentage = (All_features.shape[0] - All_features.count()) / All_features.shape[0] index = nan_percentage.index for i in range(0, len(nan_percentage)): if nan_percentage[i] > threshold: del All_features[index[i]] # drop constant features All_features = All_features.loc[:, All_features.apply(pd.Series.nunique) != 1] All_features.replace([np.inf, -np.inf], np.nan) if All_features.isnull().values.any(): All_features = All_features.fillna(All_features.mean()) #filter out not important features All_features = select_features(All_features, y) kind_to_fc_parameters = tsfresh.feature_extraction.settings.from_columns( All_features) if len(kind_to_fc_parameters) > 0: temp = extract_features(df_shift.iloc[:3, :], column_id="id", column_sort="time", column_value="value", kind_to_fc_parameters=kind_to_fc_parameters, impute_function=None, disable_progressbar=True, show_warnings=False, n_jobs=8) All_features = All_features[temp.columns & All_features.columns] return All_features, False else: return None, True
def split_into_train_test_out_tsfresh(data, in_num): """ Get the time series to be used for feature extraction y_train is the y value of the data fitting data """ data1 = np.roll(data, -1) # roll the data once #make the dataframe using Tsfresh package df_shift_small, y_train = make_forecasting_frame(data1, kind="price", max_timeshift=in_num, rolling_direction=1) #create the features needed for the result = extract_features(df_shift_small, column_id="id", column_sort="time", column_value="value", impute_function=impute, show_warnings=False, disable_progressbar=False, n_jobs=5, chunksize=1, default_fc_parameters=EfficientFCParameters()) #result_without_zero = result.loc[:, (result != 0).any(axis=0)] #the 50 columns i only need out tsfresh columl_list = [ # 'value__absolute_sum_of_changes', # ============================================================================= # ============================================================================= 'value__agg_autocorrelation__f_agg_"mean"', 'value__agg_autocorrelation__f_agg_"median"', 'value__agg_autocorrelation__f_agg_"var"', 'value__autocorrelation__lag_0', 'value__autocorrelation__lag_1', 'value__autocorrelation__lag_2', 'value__binned_entropy__max_bins_10', # ============================================================================= # ============================================================================= # 'value__cid_ce__normalize_False', # 'value__cid_ce__normalize_True', # 'value__count_above_mean', # 'value__count_below_mean', # 'value__fft_aggregated__aggtype_"centroid"', 'value__fft_aggregated__aggtype_"variance"', 'value__fft_coefficient__coeff_0__attr_"abs"', 'value__fft_coefficient__coeff_0__attr_"real"', 'value__fft_coefficient__coeff_1__attr_"abs"', 'value__fft_coefficient__coeff_1__attr_"angle"', 'value__fft_coefficient__coeff_1__attr_"imag"', 'value__fft_coefficient__coeff_1__attr_"real"', 'value__first_location_of_maximum', #============================================================================= # ============================================================================= 'value__large_standard_deviation__r_0.05', 'value__large_standard_deviation__r_0.1', 'value__large_standard_deviation__r_0.15000000000000002', 'value__large_standard_deviation__r_0.2', 'value__large_standard_deviation__r_0.25', # 'value__large_standard_deviation__r_0.30000000000000004', # 'value__large_standard_deviation__r_0.35000000000000003', # 'value__large_standard_deviation__r_0.4', # 'value__large_standard_deviation__r_0.45', # ============================================================================= # ============================================================================= 'value__linear_trend__attr_"intercept"', 'value__linear_trend__attr_"pvalue"', 'value__linear_trend__attr_"rvalue"', 'value__linear_trend__attr_"slope"', 'value__longest_strike_above_mean', 'value__longest_strike_below_mean', 'value__max_langevin_fixed_point__m_3__r_30', 'value__maximum', 'value__mean', 'value__mean_abs_change', 'value__mean_change', 'value__median', 'value__minimum', 'value__number_cwt_peaks__n_5', 'value__partial_autocorrelation__lag_0', 'value__partial_autocorrelation__lag_1', 'value__partial_autocorrelation__lag_2', 'value__standard_deviation', 'value__sum_values', 'value__variance' ] #extract just only those colums result_without_zero = result[columl_list] #return these values x_train = result_without_zero[:-1] x_test = result_without_zero[-1:] y_train = y_train[:-1] return x_train, y_train, x_test
def get_tsfresh_features(df=None, max_timeshift=10, n_jobs=10): from tsfresh.utilities.dataframe_functions import make_forecasting_frame from tsfresh.utilities.dataframe_functions import impute from tsfresh.feature_extraction import extract_features import pandas as pd if max_timeshift > 10: d = { 'skewness': None, 'kurtosis': None, 'quantile': [{ 'q': 0.05 }, { 'q': 0.95 }], 'linear_trend': [{ 'attr': 'slope' }], 'mean_abs_change': None, 'mean_second_derivative_central': None, 'fft_aggregated': [{ 'aggtype': "centroid" }, { 'aggtype': "variance" }, { 'aggtype': "skew" }, { 'aggtype': "kurtosis" }], # 'max_min_diff': None, # 'max_slope': None, # 'min_slope': None } else: d = { 'mean': None, 'maximum': None, 'minimum': None, 'mean_abs_change': None, 'mean_second_derivative_central': None, # 'max_min_diff': None, # 'max_slope': None, # 'min_slope': None } df = df.fillna(method='ffill') df_tsfresh = df.reset_index(level=[0, 1], drop=True) dfs = {} cols_to_calc = [ "rougher.input.feed_fe", "rougher.input.feed_zn", "rougher.input.feed_sol", "rougher.input.feed_pb", "rougher.input.feed_rate", 'rougher.input.floatbank11_xanthate', 'rougher.input.floatbank10_copper_sulfate', 'rougher.state.floatbank10_b_air', "secondary_cleaner.state.floatbank5_a_air", "primary_cleaner.input.copper_sulfate", "primary_cleaner.state.floatbank8_a_air", "primary_cleaner.input.depressant", "primary_cleaner.input.feed_size", "primary_cleaner.input.xanthate" ] for c in cols_to_calc: print(f'Working on {c}...') df_shift, y = make_forecasting_frame(df_tsfresh[c], kind="price", max_timeshift=max_timeshift, rolling_direction=1) X = extract_features(df_shift, column_id="id", column_sort="time", column_value="value", impute_function=impute, show_warnings=False, default_fc_parameters=d, n_jobs=n_jobs) dfs[c] = X df_tsfresh_feats = pd.concat(dfs, keys=list(dfs.keys())) df_tsfresh_feats.columns = [ f'{i}_p{max_timeshift}' for i in df_tsfresh_feats.columns ] return df_tsfresh_feats
import pandas as pd import tsfresh from tsfresh.utilities.dataframe_functions import make_forecasting_frame # df = pd.read_csv('../data/input/international-airline-passengers.csv', index_col=0) # df.index = range(len(df)) df = pd.DataFrame().from_dict({'y': [1, 2, 3, 4]}) print(df) x, y = make_forecasting_frame( x=df['y'], kind='ts', max_timeshift=1, rolling_direction=1 ) print(x) print(y)
# [df] + [df.shift(i).rename(columns=lambda c: "{}_lag_{}".format(c, i)) # for i in range(1, max_lags)], axis=1).dropna() # rolled = roll_time_series(df, # column_id='id', # column_sort='timestamp', # column_kind='kind', # rolling_direction=1, # max_timeshift=2) scaled_value = MinMaxScaler().fit_transform( df.value.values.reshape(-1, 1))[:, 0] scaled_value = pd.Series(scaled_value, index=df.index, name=df.value.name) df_shift, y = make_forecasting_frame(scaled_value, kind="kind", max_timeshift=10, rolling_direction=1) if False: extract_start = time.time() X = extract_features(df_shift, column_id="id", column_sort="time", column_value="value", impute_function=impute, n_jobs=8, show_warnings=False) extract_end = time.time() print("Extraction time: {}".format(extract_end - extract_start)) raw_feat_num = X.shape[1] print("Extracted {} features.".format(raw_feat_num))