def get_time_steps(time_range: Union[str, datetime, Tuple[datetime, datetime]], specs: ModelSpecs) -> pd.DatetimeIndex: """ Get relevant datetime indices to build features for. The time_range parameter can be one or two datetime objects, in which case this function builds a DateTimeIndex. It can also be one of two strings: "train" or "test". In this situation, this function creates a training or testing period from model specs. TODO: we can check (and complain) if datetime objects are incompatible to specs.frequency e.g. if round_datetime(dt, by_seconds=specs.frequency.total_seconds()) != dt: raise Exception("%s is not compatible with frequency %s." % (dt, specs.frequency)) We have to discuss if we allow to use any time to start intervals or rather 15:00, 15:15, 15:30 etc ... """ # check valid time_range parameter if not (isinstance(time_range, datetime) or (isinstance(time_range, tuple) and isinstance(time_range[0], datetime) and isinstance(time_range[1], datetime)) or (isinstance(time_range, str) and time_range in ("train", "test"))): raise Exception( "Goal for DateTimeIndex construction needs to be either a string ('train', 'test')," "a tuple of two datetime objects or one datetime object.") pd_frequency = timedelta_to_pandas_freq_str(specs.frequency) # easy cases: one or two datetime objects if isinstance(time_range, datetime): return pd.date_range(time_range, time_range, closed="left", freq=pd_frequency) elif isinstance(time_range, tuple): if not timedelta_fits_into(specs.frequency, time_range[1] - time_range[0]): raise Exception( "Start & end period (%s to %s) does not cleanly fit a multiple of the model frequency (%s)" % (time_range[0], time_range[1], specs.frequency)) return pd.date_range(time_range[0], time_range[1], closed="left", freq=pd_frequency) # special cases: "train" or "test" - we have to calculate from model specs length_of_data = specs.end_of_testing - specs.start_of_training if time_range == "train": end_of_training = (specs.start_of_training + length_of_data * specs.ratio_training_testing_data) end_of_training = round_datetime(end_of_training, specs.frequency.total_seconds()) logger.debug("Start of training: %s" % specs.start_of_training) logger.debug("End of training: %s" % end_of_training) return pd.date_range(specs.start_of_training, end_of_training, freq=pd_frequency) elif time_range == "test": start_of_testing = ( specs.start_of_training + (length_of_data * specs.ratio_training_testing_data) + specs.frequency) start_of_testing = round_datetime(start_of_testing, specs.frequency.total_seconds()) logger.debug("Start of testing: %s" % start_of_testing) logger.debug("End of testing: %s" % specs.end_of_testing) return pd.date_range(start_of_testing, specs.end_of_testing, freq=pd_frequency)
def test_round_time_by_15min(dt): round_to_hour = round_datetime(dt, by_seconds=60 * 15) assert round_to_hour.day == dt.day assert round_to_hour.hour == 14 assert round_to_hour.minute == 45
def test_round_time_by_15min(): dt = datetime(2018, 1, 26, 14, 40) round_to_hour = round_datetime(dt, by_seconds=60 * 15) assert round_to_hour.day == dt.day assert round_to_hour.hour == 14 assert round_to_hour.minute == 45
def test_round_time_by_hour(dt): round_to_hour = round_datetime(dt, by_seconds=60 * 60) assert round_to_hour.day == dt.day assert round_to_hour.hour == 15 assert round_to_hour.minute == 00
def test_round_time_by_hour(): dt = datetime(2018, 1, 26, 14, 40) round_to_hour = round_datetime(dt, by_seconds=60 * 60) assert round_to_hour.day == dt.day assert round_to_hour.hour == 15 assert round_to_hour.minute == 00