def gen_dt_feature(self): ''' | Generate datetime feature for each row. Currently we generate following features: | "MINUTE": The minute of the time stamp. | "DAY": The day of the time stamp. | "DAYOFYEAR": The ordinal day of the year of the time stamp. | "HOUR": The hour of the time stamp. | "WEEKDAY": The day of the week of the time stamp, Monday=0, Sunday=6. | "WEEKOFYEAR": The ordinal week of the year of the time stamp. | "MONTH": The month of the time stamp. | "IS_AWAKE": Bool value indicating whether it belongs to awake hours for the time stamp, | True for hours between 6A.M. and 1A.M. | "IS_BUSY_HOURS": Bool value indicating whether it belongs to busy hours for the time | stamp, True for hours between 7A.M. and 10A.M. and hours between 4P.M. and 8P.M. | "IS_WEEKEND": Bool value indicating whether it belongs to weekends for the time stamp, | True for Saturdays and Sundays. :return: the tsdataset instance. ''' df_list = [ generate_dt_features( input_df=self.df[self.df[self.id_col] == id_name], dt_col=self.dt_col) for id_name in self._id_list ] self.df = pd.concat(df_list) from zoo.chronos.data.utils.feature import TIME_FEATURE, \ ADDITIONAL_TIME_FEATURE_HOUR, ADDITIONAL_TIME_FEATURE_WEEKDAY increased_attrbutes = list(TIME_FEATURE) +\ list(ADDITIONAL_TIME_FEATURE_HOUR) +\ list(ADDITIONAL_TIME_FEATURE_WEEKDAY) self.feature_col += [ attr + "({})".format(self.dt_col) for attr in increased_attrbutes ] return self
def gen_dt_feature(self): ''' Generate datetime feature for each row. Currently we generate following features: "MINUTE", "DAY", "DAYOFYEAR", "HOUR", "WEEKDAY", "WEEKOFYEAR", "MONTH", "IS_AWAKE", "IS_BUSY_HOURS", "IS_WEEKEND" :return: the tsdataset instance. Note: it should be called before scale if needed. ''' df_list = [ generate_dt_features( input_df=self.df[self.df[self.id_col] == id_name], dt_col=self.dt_col) for id_name in self._id_list ] self.df = pd.concat(df_list) from zoo.chronos.data.utils.feature import TIME_FEATURE, \ ADDITIONAL_TIME_FEATURE_HOUR, ADDITIONAL_TIME_FEATURE_WEEKDAY increased_attrbutes = list(TIME_FEATURE) +\ list(ADDITIONAL_TIME_FEATURE_HOUR) +\ list(ADDITIONAL_TIME_FEATURE_WEEKDAY) self.feature_col += [ attr + "({})".format(self.dt_col) for attr in increased_attrbutes ] return self
def test_generate_dt_features(self): dates = pd.date_range('1/1/2019', periods=8) data = np.random.randn(8, 3) df = pd.DataFrame({ "datetime": dates, "values": data[:, 0], "A": data[:, 1], "B": data[:, 2] }) df = generate_dt_features(df, dt_col="datetime") assert set(df.columns) == { 'IS_AWAKE(datetime)', 'IS_BUSY_HOURS(datetime)', 'HOUR(datetime)', 'DAY(datetime)', 'IS_WEEKEND(datetime)', 'WEEKDAY(datetime)', 'MONTH(datetime)', 'DAYOFYEAR(datetime)', 'WEEKOFYEAR(datetime)', 'MINUTE(datetime)', 'A', 'B', 'values', 'datetime' }
def gen_dt_feature(self, features="auto", one_hot_features=None): ''' Generate datetime feature(s) for each record. :param features: str or list, states which feature(s) will be generated. If the value is set to be a str, it should be one of "auto" or "all". For "auto", a subset of datetime features will be generated under the consideration of the sampling frequency of your data. For "all", the whole set of datetime features will be generated. If the value is set to be a list, the list should contain the features you want to generate. A table of all datatime features and their description is listed below. The value defaults to "auto". :param one_hot_features: list, states which feature(s) will be generated as one-hot-encoded feature. The value defaults to None, which means no features will be generated with\ one-hot-encoded. | "MINUTE": The minute of the time stamp. | "DAY": The day of the time stamp. | "DAYOFYEAR": The ordinal day of the year of the time stamp. | "HOUR": The hour of the time stamp. | "WEEKDAY": The day of the week of the time stamp, Monday=0, Sunday=6. | "WEEKOFYEAR": The ordinal week of the year of the time stamp. | "MONTH": The month of the time stamp. | "IS_AWAKE": Bool value indicating whether it belongs to awake hours for the time stamp, | True for hours between 6A.M. and 1A.M. | "IS_BUSY_HOURS": Bool value indicating whether it belongs to busy hours for the time | stamp, True for hours between 7A.M. and 10A.M. and hours between 4P.M. and 8P.M. | "IS_WEEKEND": Bool value indicating whether it belongs to weekends for the time stamp, | True for Saturdays and Sundays. :return: the tsdataset instance. ''' features_generated = [] df_list = [ generate_dt_features( input_df=self.df[self.df[self.id_col] == id_name], dt_col=self.dt_col, features=features, one_hot_features=one_hot_features, freq=self._freq, features_generated=features_generated) for id_name in self._id_list ] self.df = pd.concat(df_list) self.feature_col += features_generated return self
def test_generate_dt_features(self): dates = pd.date_range('1/1/2019', periods=8) data = np.random.randn(8, 3) df = pd.DataFrame({ "datetime": dates, "values": data[:, 0], "A": data[:, 1], "B": data[:, 2] }) df = generate_dt_features(df, dt_col="datetime", features="auto", one_hot_features=None, freq=pd.Timedelta("1D"), features_generated=[]) assert set(df.columns) == { 'DAY', 'IS_WEEKEND', 'WEEKDAY', 'MONTH', 'DAYOFYEAR', 'WEEKOFYEAR', 'A', 'B', 'YEAR', 'values', 'datetime' }