class CyclicTimerGenerator(DependentGenerator): """A TimeProfiler contains an activity profile over a defined time range. It's mostly a super class, normally only its child classes should be used. The goal of a TimeProfiler is to keep a track of the expected level of activity of users over a cyclic time range It will store a vector with probabilities of activity per time step, as well as a cumulative sum of the probabilities starting with the current time step. This allows to quickly produce random waiting times until the next event for the users """ def __init__(self, clock, seed, config): """ This should not be used, only child classes :type clock: Clock :param clock: the master clock driving this simulator :type seed: int :param seed: seed for random number generator, default None :return: A new TimeProfiler is created """ DependentGenerator.__init__(self) self._state = RandomState(seed) self.config = config self.clock = clock # "macro" time shift: we shift the whole profile n times in the future # or the past until it overlaps with the current clock date init_date = latest_date_before( starting_date=config.start_date, upper_bound=clock.current_date, time_step=pd.Timedelta(config.profile_time_steps) * len(config.profile)) # Un-scaled weight profile. We artificially adds a nan to force the # up-sclaling to multiply the last element profile_idx = pd.date_range(start=init_date, freq=config.profile_time_steps, periods=len(config.profile) + 1) profile_ser = pd.Series(data=config.profile + [np.nan], index=profile_idx) # scaled weight profile, s.t. one clock step == one profile value profile_ser = profile_ser.resample(rule=clock.step_duration).pad()[:-1] self.n_time_bin = profile_ser.shape[0] profile_cdf = (profile_ser / profile_ser.sum()).cumsum() self.profile = pd.DataFrame({ "cdf": profile_cdf, # for debugging "timeframe": np.arange(len(profile_cdf)) }) # "micro" time shift,: we step forward along the profile until it is # align with the current date while self.profile.index[0] < clock.current_date: self.increment() # makes sure we'll get notified when the clock goes forward clock.register_increment_listener(self) def increment(self): """ Increment the time generator by 1 step. This has as effect to move the cdf of one step to the left, decrease all values by the value of the original first entry, and placing the previous first entry at the end of the cdf, with value 1. """ self.profile["cdf"] -= self.profile["cdf"].iloc[0] self.profile = pd.concat( [self.profile.iloc[1:], self.profile.iloc[:1]]) self.profile.loc[self.profile.index[-1], "cdf"] = 1 def generate(self, observations): """Generate random waiting times, based on some observed activity levels. The higher the level of activity, the shorter the waiting times will be :type observations: Pandas Series :param observations: contains an array of floats :return: Pandas Series """ activities = observations # activities less often than once per cycle length low_activities = activities.where((activities <= 2) & (activities > 0)).dropna() if low_activities.shape[0] > 0: draw = self._state.uniform(size=low_activities.shape[0]) # A uniform [0, 2/activity] yields an expected freqs == 1/activity # == average period between story. # => n_cycles is the number of full timer cycles from now until # next story. It's typically not an integer and possibly be > 1 # since we have on average less han 1 activity per cycle of this # timer. n_cycles = 2 * draw / low_activities.values timer_slots = n_cycles % 1 n_cycles_int = n_cycles - timer_slots timers = self.profile["cdf"].searchsorted(timer_slots) + \ self.n_time_bin * n_cycles_int low_activity_timer = pd.Series(timers, index=low_activities.index) else: low_activity_timer = pd.Series() high_activities = activities.where(activities > 2).dropna() if high_activities.shape[0] > 0: # A beta(1, activity-1) will yield expected frequencies of # 1/(1+activity-1) == 1/activity == average period between story. # This just stops to work for activities < 1, or even close to one # => we use the uniform mechanism above for activities <= 2 and # rely on betas here for expected frequencies of 2 per cycle or # higher timer_slots = high_activities.apply( lambda activity: self._state.beta(1, activity - 1)) timers = self.profile["cdf"].searchsorted(timer_slots, side="left") high_activity_timer = pd.Series(timers, index=high_activities.index) else: high_activity_timer = pd.Series() all_timers = pd.concat([low_activity_timer, high_activity_timer]) # Not sure about that one, there seem to be a bias somewhere that # systematically generates too large timer. Maybe it's a rounding # effect of searchsorted() or so. Or a bug elsewhere ? all_timers = all_timers.apply(lambda d: max(0, d - 1)) # makes sure all_timers is in the same order and with the same index # as input observations, even in case of duplicate index values all_timers = all_timers.reindex_like(observations) return all_timers def activity(self, n, per): """ :param n: number of stories :param per: time period for that number of stories :type per: pd.Timedelta :return: the activity level corresponding to the specified number of n executions per time period """ scale = self.config.duration().total_seconds() / per.total_seconds() activity = n * scale requested_period = pd.Timedelta(seconds=per.total_seconds() / n) if requested_period < self.clock.step_duration: logging.warning( "Warning: Creating activity level for {} stories per " "{} => activity is {} but period is {}, which is " "shorter than the clock period ({}). This clock " "cannot keep up with such rate and less events will be" " produced".format(n, per, activity, requested_period, self.clock.step_duration)) return activity