Example #1
0
class CyclicTimerGenerator(DependentGenerator):
    """A TimeProfiler contains an activity profile over a defined time range.
    It's mostly a super class, normally only its child classes should be used.

    The goal of a TimeProfiler is to keep a track of the expected level of activity of users over a cyclic time range
    It will store a vector with probabilities of activity per time step, as well as a cumulative sum of the
    probabilities starting with the current time step.

    This allows to quickly produce random waiting times until the next event for the users

    """
    def __init__(self, clock, seed, config):
        """
        This should not be used, only child classes

        :type clock: Clock
        :param clock: the master clock driving this simulator

        :type seed: int
        :param seed: seed for random number generator, default None
        :return: A new TimeProfiler is created
        """
        DependentGenerator.__init__(self)
        self._state = RandomState(seed)
        self.config = config
        self.clock = clock

        # "macro" time shift: we shift the whole profile n times in the future
        # or the past until it overlaps with the current clock date
        init_date = latest_date_before(
            starting_date=config.start_date,
            upper_bound=clock.current_date,
            time_step=pd.Timedelta(config.profile_time_steps) *
            len(config.profile))

        # Un-scaled weight profile. We artificially adds a nan to force the
        # up-sclaling to multiply the last element
        profile_idx = pd.date_range(start=init_date,
                                    freq=config.profile_time_steps,
                                    periods=len(config.profile) + 1)
        profile_ser = pd.Series(data=config.profile + [np.nan],
                                index=profile_idx)

        # scaled weight profile, s.t. one clock step == one profile value
        profile_ser = profile_ser.resample(rule=clock.step_duration).pad()[:-1]

        self.n_time_bin = profile_ser.shape[0]

        profile_cdf = (profile_ser / profile_ser.sum()).cumsum()
        self.profile = pd.DataFrame({
            "cdf": profile_cdf,

            # for debugging
            "timeframe": np.arange(len(profile_cdf))
        })

        # "micro" time shift,: we step forward along the profile until it is
        # align with the current date
        while self.profile.index[0] < clock.current_date:
            self.increment()

        # makes sure we'll get notified when the clock goes forward
        clock.register_increment_listener(self)

    def increment(self):
        """
        Increment the time generator by 1 step.

        This has as effect to move the cdf of one step to the left, decrease
        all values by the value of the original first entry, and placing the
        previous first entry at the end of the cdf, with value 1.
        """

        self.profile["cdf"] -= self.profile["cdf"].iloc[0]

        self.profile = pd.concat(
            [self.profile.iloc[1:], self.profile.iloc[:1]])
        self.profile.loc[self.profile.index[-1], "cdf"] = 1

    def generate(self, observations):
        """Generate random waiting times, based on some observed activity
        levels. The higher the level of activity, the shorter the waiting
        times will be

        :type observations: Pandas Series
        :param observations: contains an array of floats
        :return: Pandas Series
        """

        activities = observations

        # activities less often than once per cycle length
        low_activities = activities.where((activities <= 2)
                                          & (activities > 0)).dropna()
        if low_activities.shape[0] > 0:

            draw = self._state.uniform(size=low_activities.shape[0])

            # A uniform [0, 2/activity] yields an expected freqs == 1/activity
            # == average period between story.
            # => n_cycles is the number of full timer cycles from now until
            # next story. It's typically not an integer and possibly be > 1
            # since we have on average less han 1 activity per cycle of this
            # timer.
            n_cycles = 2 * draw / low_activities.values

            timer_slots = n_cycles % 1
            n_cycles_int = n_cycles - timer_slots

            timers = self.profile["cdf"].searchsorted(timer_slots) + \
                self.n_time_bin * n_cycles_int

            low_activity_timer = pd.Series(timers, index=low_activities.index)

        else:
            low_activity_timer = pd.Series()

        high_activities = activities.where(activities > 2).dropna()
        if high_activities.shape[0] > 0:

            # A beta(1, activity-1) will yield expected frequencies of
            # 1/(1+activity-1) == 1/activity == average period between story.
            # This just stops to work for activities < 1, or even close to one
            # => we use the uniform mechanism above for activities <= 2 and
            # rely on betas here for expected frequencies of 2 per cycle or
            # higher
            timer_slots = high_activities.apply(
                lambda activity: self._state.beta(1, activity - 1))

            timers = self.profile["cdf"].searchsorted(timer_slots, side="left")
            high_activity_timer = pd.Series(timers,
                                            index=high_activities.index)

        else:
            high_activity_timer = pd.Series()

        all_timers = pd.concat([low_activity_timer, high_activity_timer])

        # Not sure about that one, there seem to be a bias somewhere that
        # systematically generates too large timer. Maybe it's a rounding
        # effect of searchsorted() or so. Or a bug elsewhere ?
        all_timers = all_timers.apply(lambda d: max(0, d - 1))

        # makes sure all_timers is in the same order and with the same index
        # as input observations, even in case of duplicate index values
        all_timers = all_timers.reindex_like(observations)
        return all_timers

    def activity(self, n, per):
        """

        :param n: number of stories
        :param per: time period for that number of stories
        :type per: pd.Timedelta
        :return: the activity level corresponding to the specified number of n
        executions per time period
        """

        scale = self.config.duration().total_seconds() / per.total_seconds()
        activity = n * scale

        requested_period = pd.Timedelta(seconds=per.total_seconds() / n)
        if requested_period < self.clock.step_duration:
            logging.warning(
                "Warning: Creating activity level for {} stories per "
                "{} =>  activity is {} but period is {}, which is "
                "shorter  than the clock period ({}). This clock "
                "cannot keep up with such rate and less events will be"
                " produced".format(n, per, activity, requested_period,
                                   self.clock.step_duration))

        return activity